{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 375, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.6666666666666666, "grad_norm": 9.75, "learning_rate": 1.9491669984901377e-05, "loss": 0.6663, "mean_token_accuracy": 0.8282047653198242, "num_input_tokens_seen": 2327984, "num_tokens": 2327984.0, "step": 50 }, { "epoch": 1.3333333333333333, "grad_norm": 17.25, "learning_rate": 1.72967916579403e-05, "loss": 0.3542, "mean_token_accuracy": 0.9049889421463013, "num_input_tokens_seen": 5204816, "num_tokens": 5204816.0, "step": 100 }, { "epoch": 2.0, "grad_norm": 3.25, "learning_rate": 1.3756762552443555e-05, "loss": 0.0478, "mean_token_accuracy": 0.9875464010238647, "num_input_tokens_seen": 7271696, "num_tokens": 7271696.0, "step": 150 }, { "epoch": 2.6666666666666665, "grad_norm": 2.171875, "learning_rate": 9.524180841762577e-06, "loss": 0.014, "mean_token_accuracy": 0.9964023971557617, "num_input_tokens_seen": 9599680, "num_tokens": 9599680.0, "step": 200 }, { "epoch": 3.3333333333333335, "grad_norm": 0.00946044921875, "learning_rate": 5.379315560596038e-06, "loss": 0.0023, "mean_token_accuracy": 0.9995168459415436, "num_input_tokens_seen": 12476512, "num_tokens": 12476512.0, "step": 250 }, { "epoch": 4.0, "grad_norm": 0.169921875, "learning_rate": 2.0862653732958914e-06, "loss": 0.0002, "mean_token_accuracy": 0.9999828958511352, "num_input_tokens_seen": 14543392, "num_tokens": 14543392.0, "step": 300 }, { "epoch": 4.666666666666667, "grad_norm": 0.0380859375, "learning_rate": 2.520983216615047e-07, "loss": 0.0001, "mean_token_accuracy": 1.0, "num_input_tokens_seen": 16871376, "num_tokens": 16871376.0, "step": 350 }, { "epoch": 5.0, "mean_token_accuracy": 1.0, "num_input_tokens_seen": 18179240, "num_tokens": 18179240.0, "step": 375, "total_flos": 1.0562823256014848e+17, "train_loss": 0.14466818872839213, "train_runtime": 6016.1372, "train_samples_per_second": 0.499, "train_steps_per_second": 0.062, "train_tokens_per_second": 176.962 } ], "logging_steps": 50, "max_steps": 375, "num_input_tokens_seen": 18179240, "num_train_epochs": 5, "save_steps": 0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0562823256014848e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }