{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "eval_steps": 500, "global_step": 1170, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 0.0614222863931305, "eval_loss": 7.409209728240967, "eval_runtime": 18.3549, "eval_samples_per_second": 4.958, "eval_steps_per_second": 0.327, "step": 78 }, { "epoch": 2.0, "eval_bleu": 0.06781414925085748, "eval_loss": 6.072174072265625, "eval_runtime": 21.0214, "eval_samples_per_second": 4.329, "eval_steps_per_second": 0.285, "step": 156 }, { "epoch": 3.0, "eval_bleu": 0.07865444133157673, "eval_loss": 4.586818695068359, "eval_runtime": 16.8905, "eval_samples_per_second": 5.388, "eval_steps_per_second": 0.355, "step": 234 }, { "epoch": 4.0, "eval_bleu": 0.10492023143484121, "eval_loss": 2.7620725631713867, "eval_runtime": 20.9126, "eval_samples_per_second": 4.351, "eval_steps_per_second": 0.287, "step": 312 }, { "epoch": 5.0, "eval_bleu": 7.870773986323496e-05, "eval_loss": 1.7986291646957397, "eval_runtime": 28.9333, "eval_samples_per_second": 3.145, "eval_steps_per_second": 0.207, "step": 390 }, { "epoch": 6.0, "eval_bleu": 4.7698260005659636e-32, "eval_loss": 1.7516156435012817, "eval_runtime": 29.6724, "eval_samples_per_second": 3.067, "eval_steps_per_second": 0.202, "step": 468 }, { "epoch": 6.410256410256411, "grad_norm": 0.5151189565658569, "learning_rate": 2.976e-05, "loss": 4.7498, "step": 500 }, { "epoch": 7.0, "eval_bleu": 0.05376879163106575, "eval_loss": 1.7408103942871094, "eval_runtime": 28.1805, "eval_samples_per_second": 3.229, "eval_steps_per_second": 0.213, "step": 546 }, { "epoch": 8.0, "eval_bleu": 22.551967047591756, "eval_loss": 1.7349852323532104, "eval_runtime": 24.3775, "eval_samples_per_second": 3.733, "eval_steps_per_second": 0.246, "step": 624 }, { "epoch": 9.0, "eval_bleu": 27.560704428979403, "eval_loss": 1.7364959716796875, "eval_runtime": 19.0116, "eval_samples_per_second": 4.787, "eval_steps_per_second": 0.316, "step": 702 }, { "epoch": 10.0, "eval_bleu": 29.12683290388149, "eval_loss": 1.7374337911605835, "eval_runtime": 15.4603, "eval_samples_per_second": 5.886, "eval_steps_per_second": 0.388, "step": 780 }, { "epoch": 11.0, "eval_bleu": 29.408921311422052, "eval_loss": 1.7404249906539917, "eval_runtime": 16.9631, "eval_samples_per_second": 5.365, "eval_steps_per_second": 0.354, "step": 858 }, { "epoch": 12.0, "eval_bleu": 30.17855336606261, "eval_loss": 1.7426787614822388, "eval_runtime": 12.8597, "eval_samples_per_second": 7.076, "eval_steps_per_second": 0.467, "step": 936 }, { "epoch": 12.820512820512821, "grad_norm": 0.24796663224697113, "learning_rate": 7.791044776119404e-06, "loss": 1.5765, "step": 1000 }, { "epoch": 13.0, "eval_bleu": 30.17706557207897, "eval_loss": 1.7429455518722534, "eval_runtime": 12.8783, "eval_samples_per_second": 7.066, "eval_steps_per_second": 0.466, "step": 1014 }, { "epoch": 14.0, "eval_bleu": 30.818742074203392, "eval_loss": 1.746609091758728, "eval_runtime": 12.9695, "eval_samples_per_second": 7.016, "eval_steps_per_second": 0.463, "step": 1092 }, { "epoch": 15.0, "eval_bleu": 30.929249185720632, "eval_loss": 1.7464464902877808, "eval_runtime": 12.8549, "eval_samples_per_second": 7.079, "eval_steps_per_second": 0.467, "step": 1170 } ], "logging_steps": 500, "max_steps": 1170, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.009328968433664e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }