| { | |
| "best_metric": 0.024393858388066292, | |
| "best_model_checkpoint": "/netscratch/butt/Transliterate/RUP/finetuning/data/output_models/with_ur_rur_pretraining/m2m100_ur_rur/checkpoint-98957", | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 98957, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.16168638903766283, | |
| "grad_norm": 0.7672635912895203, | |
| "learning_rate": 1.7955627413994434e-06, | |
| "loss": 3.2537, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.32337277807532566, | |
| "grad_norm": 0.197320356965065, | |
| "learning_rate": 3.592023713284829e-06, | |
| "loss": 0.1013, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.48505916711298847, | |
| "grad_norm": 0.1217622384428978, | |
| "learning_rate": 5.388035569927243e-06, | |
| "loss": 0.0622, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.6467455561506513, | |
| "grad_norm": 0.13456104695796967, | |
| "learning_rate": 7.184047426569658e-06, | |
| "loss": 0.0481, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.8084319451883141, | |
| "grad_norm": 0.15323583781719208, | |
| "learning_rate": 8.980059283212073e-06, | |
| "loss": 0.0411, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.9701183342259769, | |
| "grad_norm": 0.10961552709341049, | |
| "learning_rate": 9.998163154071579e-06, | |
| "loss": 0.0368, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.9999898946006851, | |
| "eval_bleu_score": 89.54460906982422, | |
| "eval_loss": 0.027495749294757843, | |
| "eval_runtime": 95.4142, | |
| "eval_samples_per_second": 47.163, | |
| "eval_steps_per_second": 0.744, | |
| "step": 24739 | |
| }, | |
| { | |
| "epoch": 1.1318047232636397, | |
| "grad_norm": 0.0804838314652443, | |
| "learning_rate": 9.979866450572228e-06, | |
| "loss": 0.0339, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.2934911123013026, | |
| "grad_norm": 0.08419705927371979, | |
| "learning_rate": 9.941985543338884e-06, | |
| "loss": 0.0321, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.4551775013389654, | |
| "grad_norm": 0.06364738196134567, | |
| "learning_rate": 9.88469246388591e-06, | |
| "loss": 0.0311, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.6168638903766284, | |
| "grad_norm": 0.19403564929962158, | |
| "learning_rate": 9.808226815651367e-06, | |
| "loss": 0.0301, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.778550279414291, | |
| "grad_norm": 0.08095328509807587, | |
| "learning_rate": 9.712850825850488e-06, | |
| "loss": 0.0295, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.9402366684519539, | |
| "grad_norm": 0.05232414975762367, | |
| "learning_rate": 9.598956005105698e-06, | |
| "loss": 0.029, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.9999797892013702, | |
| "eval_bleu_score": 89.95458984375, | |
| "eval_loss": 0.02493358589708805, | |
| "eval_runtime": 92.9895, | |
| "eval_samples_per_second": 48.393, | |
| "eval_steps_per_second": 0.764, | |
| "step": 49478 | |
| }, | |
| { | |
| "epoch": 2.101923057489617, | |
| "grad_norm": 0.1178918406367302, | |
| "learning_rate": 9.467060328243924e-06, | |
| "loss": 0.0284, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 2.2636094465272794, | |
| "grad_norm": 0.06169985607266426, | |
| "learning_rate": 9.317550119060927e-06, | |
| "loss": 0.028, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 2.4252958355649423, | |
| "grad_norm": 0.05251774191856384, | |
| "learning_rate": 9.151118083407196e-06, | |
| "loss": 0.0283, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 2.5869822246026053, | |
| "grad_norm": 0.1015240028500557, | |
| "learning_rate": 8.968287285850323e-06, | |
| "loss": 0.0275, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 2.748668613640268, | |
| "grad_norm": 0.06474316865205765, | |
| "learning_rate": 8.76990706667961e-06, | |
| "loss": 0.0273, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 2.910355002677931, | |
| "grad_norm": 0.07183582335710526, | |
| "learning_rate": 8.556823439595787e-06, | |
| "loss": 0.0272, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 2.9999696838020555, | |
| "eval_bleu_score": 90.0082015991211, | |
| "eval_loss": 0.02451913431286812, | |
| "eval_runtime": 92.6435, | |
| "eval_samples_per_second": 48.573, | |
| "eval_steps_per_second": 0.766, | |
| "step": 74217 | |
| }, | |
| { | |
| "epoch": 3.0720413917155938, | |
| "grad_norm": 0.04460311308503151, | |
| "learning_rate": 8.329660528736868e-06, | |
| "loss": 0.0268, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 3.2337277807532563, | |
| "grad_norm": 0.047645051032304764, | |
| "learning_rate": 8.08941394035222e-06, | |
| "loss": 0.0266, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 3.3954141697909193, | |
| "grad_norm": 0.04565703496336937, | |
| "learning_rate": 7.83709225538658e-06, | |
| "loss": 0.0264, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 3.5571005588285822, | |
| "grad_norm": 0.06948993355035782, | |
| "learning_rate": 7.573493564289276e-06, | |
| "loss": 0.0264, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 3.7187869478662448, | |
| "grad_norm": 0.041806410998106, | |
| "learning_rate": 7.2998470510947005e-06, | |
| "loss": 0.0263, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 3.8804733369039077, | |
| "grad_norm": 0.04544525593519211, | |
| "learning_rate": 7.017306895492585e-06, | |
| "loss": 0.0262, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_bleu_score": 90.16093444824219, | |
| "eval_loss": 0.024393858388066292, | |
| "eval_runtime": 93.7037, | |
| "eval_samples_per_second": 48.024, | |
| "eval_steps_per_second": 0.758, | |
| "step": 98957 | |
| } | |
| ], | |
| "logging_steps": 4000, | |
| "max_steps": 222651, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 9, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.352425921214939e+18, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |