{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.35335689045936397, "eval_steps": 1000000000, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0353356890459364, "grad_norm": 3.154219735887708, "learning_rate": 1.4084507042253522e-07, "loss": 0.4719, "step": 10 }, { "epoch": 0.0706713780918728, "grad_norm": 3.253051238982522, "learning_rate": 2.8169014084507043e-07, "loss": 0.4961, "step": 20 }, { "epoch": 0.10600706713780919, "grad_norm": 2.7267245326428076, "learning_rate": 4.225352112676056e-07, "loss": 0.4764, "step": 30 }, { "epoch": 0.1413427561837456, "grad_norm": 2.297373638518878, "learning_rate": 5.633802816901409e-07, "loss": 0.4421, "step": 40 }, { "epoch": 0.17667844522968199, "grad_norm": 1.9288497922352652, "learning_rate": 7.04225352112676e-07, "loss": 0.4341, "step": 50 }, { "epoch": 0.21201413427561838, "grad_norm": 1.352449199460462, "learning_rate": 8.450704225352112e-07, "loss": 0.4004, "step": 60 }, { "epoch": 0.24734982332155478, "grad_norm": 1.266334407111366, "learning_rate": 9.859154929577465e-07, "loss": 0.3867, "step": 70 }, { "epoch": 0.2826855123674912, "grad_norm": 1.162984044699381, "learning_rate": 9.998893604965111e-07, "loss": 0.3635, "step": 80 }, { "epoch": 0.31802120141342755, "grad_norm": 1.1058950535521728, "learning_rate": 9.995069658160579e-07, "loss": 0.3547, "step": 90 }, { "epoch": 0.35335689045936397, "grad_norm": 1.0950308921935448, "learning_rate": 9.988516589154665e-07, "loss": 0.3594, "step": 100 } ], "logging_steps": 10, "max_steps": 1415, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 29135731163136.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }