{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 185, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.273972602739726, "grad_norm": 1.6497408854602102, "learning_rate": 2.368421052631579e-05, "loss": 1.0321, "step": 10 }, { "epoch": 0.547945205479452, "grad_norm": 0.6859212588324088, "learning_rate": 5e-05, "loss": 0.8966, "step": 20 }, { "epoch": 0.821917808219178, "grad_norm": 0.2516470875520477, "learning_rate": 4.698795180722892e-05, "loss": 0.6897, "step": 30 }, { "epoch": 1.0821917808219177, "grad_norm": 0.2261853361717197, "learning_rate": 4.3975903614457834e-05, "loss": 0.6292, "step": 40 }, { "epoch": 1.356164383561644, "grad_norm": 0.23723422604514383, "learning_rate": 4.0963855421686746e-05, "loss": 0.6211, "step": 50 }, { "epoch": 1.6301369863013697, "grad_norm": 0.15035498586842147, "learning_rate": 3.7951807228915666e-05, "loss": 0.5986, "step": 60 }, { "epoch": 1.904109589041096, "grad_norm": 0.16842522281861053, "learning_rate": 3.4939759036144585e-05, "loss": 0.5873, "step": 70 }, { "epoch": 2.1643835616438354, "grad_norm": 0.14474675376472765, "learning_rate": 3.192771084337349e-05, "loss": 0.5596, "step": 80 }, { "epoch": 2.4383561643835616, "grad_norm": 0.1755616933621088, "learning_rate": 2.891566265060241e-05, "loss": 0.5514, "step": 90 }, { "epoch": 2.712328767123288, "grad_norm": 0.16231431484379702, "learning_rate": 2.5903614457831325e-05, "loss": 0.571, "step": 100 }, { "epoch": 2.9863013698630136, "grad_norm": 0.1850816566521127, "learning_rate": 2.289156626506024e-05, "loss": 0.558, "step": 110 }, { "epoch": 3.2465753424657535, "grad_norm": 0.16463318012909617, "learning_rate": 1.9879518072289157e-05, "loss": 0.5441, "step": 120 }, { "epoch": 3.5205479452054793, "grad_norm": 0.2155679263802774, "learning_rate": 1.6867469879518073e-05, "loss": 0.5174, "step": 130 }, { "epoch": 3.7945205479452055, "grad_norm": 0.171589313018536, "learning_rate": 1.3855421686746989e-05, "loss": 0.5455, "step": 140 }, { "epoch": 4.054794520547945, "grad_norm": 0.2091422354146189, "learning_rate": 1.0843373493975904e-05, "loss": 0.5697, "step": 150 }, { "epoch": 4.328767123287671, "grad_norm": 0.20760472844880373, "learning_rate": 7.83132530120482e-06, "loss": 0.5293, "step": 160 }, { "epoch": 4.602739726027397, "grad_norm": 0.24000797818211547, "learning_rate": 4.819277108433735e-06, "loss": 0.548, "step": 170 }, { "epoch": 4.876712328767123, "grad_norm": 0.20149227384248072, "learning_rate": 1.8072289156626506e-06, "loss": 0.5152, "step": 180 }, { "epoch": 5.0, "step": 185, "total_flos": 110105977683968.0, "train_loss": 0.6138813560073441, "train_runtime": 642.1229, "train_samples_per_second": 2.266, "train_steps_per_second": 0.288 } ], "logging_steps": 10, "max_steps": 185, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 110105977683968.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }