| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 7.0, |
| "eval_steps": 500, |
| "global_step": 2639, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "eval_gen_len": 117.37, |
| "eval_loss": 1.6928337812423706, |
| "eval_rouge1": 0.42757910674263067, |
| "eval_rouge2": 0.15720196581442164, |
| "eval_rougeL": 0.256418241790792, |
| "eval_rougeLsum": 0.35382355041095964, |
| "eval_runtime": 1351.6766, |
| "eval_samples_per_second": 0.74, |
| "eval_steps_per_second": 0.047, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.3262599469496021, |
| "grad_norm": 3.7333056926727295, |
| "learning_rate": 2.8741879956059633e-06, |
| "loss": 1.7717, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_gen_len": 116.608, |
| "eval_loss": 1.6410094499588013, |
| "eval_rouge1": 0.4286897823447751, |
| "eval_rouge2": 0.15765392626100322, |
| "eval_rougeL": 0.25895063496135456, |
| "eval_rougeLsum": 0.3538857213392972, |
| "eval_runtime": 1170.2376, |
| "eval_samples_per_second": 0.855, |
| "eval_steps_per_second": 0.054, |
| "step": 754 |
| }, |
| { |
| "epoch": 2.6525198938992043, |
| "grad_norm": 4.453255653381348, |
| "learning_rate": 2.514178628889199e-06, |
| "loss": 1.5895, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_gen_len": 117.178, |
| "eval_loss": 1.6089894771575928, |
| "eval_rouge1": 0.42709156909402823, |
| "eval_rouge2": 0.1553778143480612, |
| "eval_rougeL": 0.25488136514137727, |
| "eval_rougeLsum": 0.3525718986787997, |
| "eval_runtime": 1384.4242, |
| "eval_samples_per_second": 0.722, |
| "eval_steps_per_second": 0.046, |
| "step": 1131 |
| }, |
| { |
| "epoch": 3.9787798408488064, |
| "grad_norm": 3.2527501583099365, |
| "learning_rate": 1.981320706041849e-06, |
| "loss": 1.5182, |
| "step": 1500 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_gen_len": 118.94, |
| "eval_loss": 1.597896933555603, |
| "eval_rouge1": 0.4321139550542189, |
| "eval_rouge2": 0.16019449900934246, |
| "eval_rougeL": 0.2577516929717559, |
| "eval_rougeLsum": 0.3566245729299692, |
| "eval_runtime": 1616.3945, |
| "eval_samples_per_second": 0.619, |
| "eval_steps_per_second": 0.039, |
| "step": 1508 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_gen_len": 116.603, |
| "eval_loss": 1.583080530166626, |
| "eval_rouge1": 0.43043600066735005, |
| "eval_rouge2": 0.16036196231543565, |
| "eval_rougeL": 0.2578722844279166, |
| "eval_rougeLsum": 0.35735455171087505, |
| "eval_runtime": 1754.8754, |
| "eval_samples_per_second": 0.57, |
| "eval_steps_per_second": 0.036, |
| "step": 1885 |
| }, |
| { |
| "epoch": 5.305039787798409, |
| "grad_norm": 3.344116687774658, |
| "learning_rate": 1.3651853437341204e-06, |
| "loss": 1.4547, |
| "step": 2000 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_gen_len": 116.67, |
| "eval_loss": 1.5650923252105713, |
| "eval_rouge1": 0.4348875537307479, |
| "eval_rouge2": 0.16312989568941516, |
| "eval_rougeL": 0.26082852541259915, |
| "eval_rougeLsum": 0.358369227586099, |
| "eval_runtime": 1733.5816, |
| "eval_samples_per_second": 0.577, |
| "eval_steps_per_second": 0.036, |
| "step": 2262 |
| }, |
| { |
| "epoch": 6.63129973474801, |
| "grad_norm": 3.3940606117248535, |
| "learning_rate": 7.721175674180988e-07, |
| "loss": 1.4188, |
| "step": 2500 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_gen_len": 117.259, |
| "eval_loss": 1.5626862049102783, |
| "eval_rouge1": 0.43666460369707594, |
| "eval_rouge2": 0.16469845252758708, |
| "eval_rougeL": 0.26181956536662193, |
| "eval_rougeLsum": 0.3620078400661802, |
| "eval_runtime": 1745.3989, |
| "eval_samples_per_second": 0.573, |
| "eval_steps_per_second": 0.036, |
| "step": 2639 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 3770, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0123914662589235e+17, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|