| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 20.425531914893618, | |
| "eval_steps": 300, | |
| "global_step": 720, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.9078014184397163, | |
| "grad_norm": 132.1505126953125, | |
| "learning_rate": 2.222222222222222e-07, | |
| "log_odds_chosen": 0.05492939054965973, | |
| "log_odds_ratio": -0.7323614954948425, | |
| "logits/chosen": -4.740067958831787, | |
| "logits/rejected": -4.963461399078369, | |
| "logps/chosen": -2.374514579772949, | |
| "logps/rejected": -2.4533467292785645, | |
| "loss": 2.8785, | |
| "nll_loss": 2.7699854373931885, | |
| "rewards/accuracies": 0.48828125, | |
| "rewards/chosen": -0.3561772108078003, | |
| "rewards/margins": 0.011824802495539188, | |
| "rewards/rejected": -0.3680019676685333, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 1.8156028368794326, | |
| "grad_norm": 45.387813568115234, | |
| "learning_rate": 4.444444444444444e-07, | |
| "log_odds_chosen": 0.22124934196472168, | |
| "log_odds_ratio": -0.6725601553916931, | |
| "logits/chosen": -5.0936760902404785, | |
| "logits/rejected": -5.33966588973999, | |
| "logps/chosen": -1.732269287109375, | |
| "logps/rejected": -1.9329001903533936, | |
| "loss": 2.2151, | |
| "nll_loss": 2.172783851623535, | |
| "rewards/accuracies": 0.6015625, | |
| "rewards/chosen": -0.2598403990268707, | |
| "rewards/margins": 0.030094601213932037, | |
| "rewards/rejected": -0.28993502259254456, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 2.723404255319149, | |
| "grad_norm": 19.709226608276367, | |
| "learning_rate": 4.983095894354857e-07, | |
| "log_odds_chosen": 0.2306685447692871, | |
| "log_odds_ratio": -0.658535361289978, | |
| "logits/chosen": -4.936949729919434, | |
| "logits/rejected": -5.155893802642822, | |
| "logps/chosen": -1.4097586870193481, | |
| "logps/rejected": -1.5965328216552734, | |
| "loss": 1.8908, | |
| "nll_loss": 1.8126921653747559, | |
| "rewards/accuracies": 0.6171875, | |
| "rewards/chosen": -0.2114638090133667, | |
| "rewards/margins": 0.028016118332743645, | |
| "rewards/rejected": -0.2394799143075943, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 3.631205673758865, | |
| "grad_norm": 20.5742130279541, | |
| "learning_rate": 4.908427196539701e-07, | |
| "log_odds_chosen": 0.3416966497898102, | |
| "log_odds_ratio": -0.5978461503982544, | |
| "logits/chosen": -4.871417999267578, | |
| "logits/rejected": -5.006246566772461, | |
| "logps/chosen": -1.3338335752487183, | |
| "logps/rejected": -1.595802903175354, | |
| "loss": 1.7492, | |
| "nll_loss": 1.6234831809997559, | |
| "rewards/accuracies": 0.66015625, | |
| "rewards/chosen": -0.20007506012916565, | |
| "rewards/margins": 0.039295390248298645, | |
| "rewards/rejected": -0.2393704503774643, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 4.539007092198582, | |
| "grad_norm": 19.219451904296875, | |
| "learning_rate": 4.775907352415367e-07, | |
| "log_odds_chosen": 0.4098852872848511, | |
| "log_odds_ratio": -0.5668885111808777, | |
| "logits/chosen": -4.7395429611206055, | |
| "logits/rejected": -4.919832229614258, | |
| "logps/chosen": -1.251634955406189, | |
| "logps/rejected": -1.5617362260818481, | |
| "loss": 1.6591, | |
| "nll_loss": 1.5759321451187134, | |
| "rewards/accuracies": 0.72265625, | |
| "rewards/chosen": -0.18774525821208954, | |
| "rewards/margins": 0.04651518166065216, | |
| "rewards/rejected": -0.2342604398727417, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 5.446808510638298, | |
| "grad_norm": 25.844369888305664, | |
| "learning_rate": 4.588719528532341e-07, | |
| "log_odds_chosen": 0.4376165568828583, | |
| "log_odds_ratio": -0.5644897222518921, | |
| "logits/chosen": -4.674585342407227, | |
| "logits/rejected": -4.810555934906006, | |
| "logps/chosen": -1.2456402778625488, | |
| "logps/rejected": -1.5732855796813965, | |
| "loss": 1.5996, | |
| "nll_loss": 1.4974051713943481, | |
| "rewards/accuracies": 0.73828125, | |
| "rewards/chosen": -0.1868460476398468, | |
| "rewards/margins": 0.04914678633213043, | |
| "rewards/rejected": -0.23599283397197723, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 6.3546099290780145, | |
| "grad_norm": 23.09563636779785, | |
| "learning_rate": 4.3513600327725117e-07, | |
| "log_odds_chosen": 0.3738464415073395, | |
| "log_odds_ratio": -0.5867234468460083, | |
| "logits/chosen": -4.663087844848633, | |
| "logits/rejected": -4.844013214111328, | |
| "logps/chosen": -1.3138737678527832, | |
| "logps/rejected": -1.5835403203964233, | |
| "loss": 1.5411, | |
| "nll_loss": 1.4685286283493042, | |
| "rewards/accuracies": 0.7109375, | |
| "rewards/chosen": -0.197081059217453, | |
| "rewards/margins": 0.04045000299811363, | |
| "rewards/rejected": -0.23753106594085693, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 7.26241134751773, | |
| "grad_norm": 23.646638870239258, | |
| "learning_rate": 4.0695303116802467e-07, | |
| "log_odds_chosen": 0.46367794275283813, | |
| "log_odds_ratio": -0.547984778881073, | |
| "logits/chosen": -4.620482921600342, | |
| "logits/rejected": -4.765042781829834, | |
| "logps/chosen": -1.307213544845581, | |
| "logps/rejected": -1.65544593334198, | |
| "loss": 1.498, | |
| "nll_loss": 1.4461973905563354, | |
| "rewards/accuracies": 0.75390625, | |
| "rewards/chosen": -0.19608205556869507, | |
| "rewards/margins": 0.05223485454916954, | |
| "rewards/rejected": -0.2483169138431549, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 8.170212765957446, | |
| "grad_norm": 23.585857391357422, | |
| "learning_rate": 3.75e-07, | |
| "log_odds_chosen": 0.4542897343635559, | |
| "log_odds_ratio": -0.5773134827613831, | |
| "logits/chosen": -4.633105278015137, | |
| "logits/rejected": -4.810471057891846, | |
| "logps/chosen": -1.3886733055114746, | |
| "logps/rejected": -1.7219102382659912, | |
| "loss": 1.4686, | |
| "nll_loss": 1.3969916105270386, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -0.20830100774765015, | |
| "rewards/margins": 0.049985550343990326, | |
| "rewards/rejected": -0.2582865357398987, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 8.51063829787234, | |
| "eval_log_odds_chosen": 1.6898525953292847, | |
| "eval_log_odds_ratio": -0.19230316579341888, | |
| "eval_logits/chosen": -4.930749416351318, | |
| "eval_logits/rejected": -4.758046627044678, | |
| "eval_logps/chosen": -1.4714246988296509, | |
| "eval_logps/rejected": -2.9621574878692627, | |
| "eval_loss": 1.3246647119522095, | |
| "eval_nll_loss": 1.4632530212402344, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": -0.22071371972560883, | |
| "eval_rewards/margins": 0.22360996901988983, | |
| "eval_rewards/rejected": -0.44432368874549866, | |
| "eval_runtime": 0.6144, | |
| "eval_samples_per_second": 222.965, | |
| "eval_steps_per_second": 4.882, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 9.078014184397164, | |
| "grad_norm": 20.154146194458008, | |
| "learning_rate": 3.400444312011776e-07, | |
| "log_odds_chosen": 0.409349262714386, | |
| "log_odds_ratio": -0.5844379663467407, | |
| "logits/chosen": -4.599703311920166, | |
| "logits/rejected": -4.766429901123047, | |
| "logps/chosen": -1.3831363916397095, | |
| "logps/rejected": -1.6733564138412476, | |
| "loss": 1.4397, | |
| "nll_loss": 1.4065345525741577, | |
| "rewards/accuracies": 0.74609375, | |
| "rewards/chosen": -0.20747046172618866, | |
| "rewards/margins": 0.04353303089737892, | |
| "rewards/rejected": -0.2510034739971161, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 9.98581560283688, | |
| "grad_norm": 23.09050750732422, | |
| "learning_rate": 3.029259680573527e-07, | |
| "log_odds_chosen": 0.43665847182273865, | |
| "log_odds_ratio": -0.5905143618583679, | |
| "logits/chosen": -4.58922815322876, | |
| "logits/rejected": -4.685288906097412, | |
| "logps/chosen": -1.458475112915039, | |
| "logps/rejected": -1.7894960641860962, | |
| "loss": 1.4285, | |
| "nll_loss": 1.3732693195343018, | |
| "rewards/accuracies": 0.73046875, | |
| "rewards/chosen": -0.21877126395702362, | |
| "rewards/margins": 0.04965316504240036, | |
| "rewards/rejected": -0.26842445135116577, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 10.893617021276595, | |
| "grad_norm": 35.926055908203125, | |
| "learning_rate": 2.6453620722761895e-07, | |
| "log_odds_chosen": 0.6511461138725281, | |
| "log_odds_ratio": -0.49195483326911926, | |
| "logits/chosen": -4.608173370361328, | |
| "logits/rejected": -4.685794830322266, | |
| "logps/chosen": -1.3694053888320923, | |
| "logps/rejected": -1.8711962699890137, | |
| "loss": 1.4144, | |
| "nll_loss": 1.374709129333496, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -0.20541077852249146, | |
| "rewards/margins": 0.07526866346597672, | |
| "rewards/rejected": -0.28067946434020996, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 11.801418439716311, | |
| "grad_norm": 33.8105583190918, | |
| "learning_rate": 2.2579728232420523e-07, | |
| "log_odds_chosen": 0.5499828457832336, | |
| "log_odds_ratio": -0.5233615040779114, | |
| "logits/chosen": -4.537787437438965, | |
| "logits/rejected": -4.662774085998535, | |
| "logps/chosen": -1.3898181915283203, | |
| "logps/rejected": -1.7920804023742676, | |
| "loss": 1.4016, | |
| "nll_loss": 1.3631547689437866, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -0.20847272872924805, | |
| "rewards/margins": 0.06033932417631149, | |
| "rewards/rejected": -0.26881206035614014, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 12.709219858156029, | |
| "grad_norm": 25.557348251342773, | |
| "learning_rate": 1.8763971398550467e-07, | |
| "log_odds_chosen": 0.5377756357192993, | |
| "log_odds_ratio": -0.5508320927619934, | |
| "logits/chosen": -4.532352447509766, | |
| "logits/rejected": -4.629130840301514, | |
| "logps/chosen": -1.3977127075195312, | |
| "logps/rejected": -1.7851612567901611, | |
| "loss": 1.3943, | |
| "nll_loss": 1.3304414749145508, | |
| "rewards/accuracies": 0.77734375, | |
| "rewards/chosen": -0.20965692400932312, | |
| "rewards/margins": 0.05811727046966553, | |
| "rewards/rejected": -0.26777422428131104, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 13.617021276595745, | |
| "grad_norm": 28.700815200805664, | |
| "learning_rate": 1.5098005849021078e-07, | |
| "log_odds_chosen": 0.5411101579666138, | |
| "log_odds_ratio": -0.5445564985275269, | |
| "logits/chosen": -4.501680850982666, | |
| "logits/rejected": -4.677550315856934, | |
| "logps/chosen": -1.3654242753982544, | |
| "logps/rejected": -1.7541980743408203, | |
| "loss": 1.401, | |
| "nll_loss": 1.2766036987304688, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -0.2048136293888092, | |
| "rewards/margins": 0.05831605941057205, | |
| "rewards/rejected": -0.26312971115112305, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 14.52482269503546, | |
| "grad_norm": 35.40031814575195, | |
| "learning_rate": 1.1669889179957723e-07, | |
| "log_odds_chosen": 0.7372524738311768, | |
| "log_odds_ratio": -0.46363916993141174, | |
| "logits/chosen": -4.513700485229492, | |
| "logits/rejected": -4.619227886199951, | |
| "logps/chosen": -1.3301138877868652, | |
| "logps/rejected": -1.8859204053878784, | |
| "loss": 1.3839, | |
| "nll_loss": 1.219886302947998, | |
| "rewards/accuracies": 0.87109375, | |
| "rewards/chosen": -0.19951710104942322, | |
| "rewards/margins": 0.08337096124887466, | |
| "rewards/rejected": -0.2828880548477173, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 15.432624113475176, | |
| "grad_norm": 62.16829299926758, | |
| "learning_rate": 8.561965785773412e-08, | |
| "log_odds_chosen": 0.661382794380188, | |
| "log_odds_ratio": -0.4891131520271301, | |
| "logits/chosen": -4.506048202514648, | |
| "logits/rejected": -4.587852478027344, | |
| "logps/chosen": -1.3864898681640625, | |
| "logps/rejected": -1.8775601387023926, | |
| "loss": 1.3876, | |
| "nll_loss": 1.2974672317504883, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -0.20797351002693176, | |
| "rewards/margins": 0.07366053014993668, | |
| "rewards/rejected": -0.28163403272628784, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 16.340425531914892, | |
| "grad_norm": 29.107358932495117, | |
| "learning_rate": 5.848888922025552e-08, | |
| "log_odds_chosen": 0.6269708275794983, | |
| "log_odds_ratio": -0.49757176637649536, | |
| "logits/chosen": -4.460994243621826, | |
| "logits/rejected": -4.661521911621094, | |
| "logps/chosen": -1.3339214324951172, | |
| "logps/rejected": -1.7924858331680298, | |
| "loss": 1.3923, | |
| "nll_loss": 1.2958626747131348, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.200088232755661, | |
| "rewards/margins": 0.06878463923931122, | |
| "rewards/rejected": -0.26887285709381104, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 17.02127659574468, | |
| "eval_log_odds_chosen": 1.7557824850082397, | |
| "eval_log_odds_ratio": -0.18494771420955658, | |
| "eval_logits/chosen": -4.790639400482178, | |
| "eval_logits/rejected": -4.577674865722656, | |
| "eval_logps/chosen": -1.626247763633728, | |
| "eval_logps/rejected": -3.2115631103515625, | |
| "eval_loss": 1.2619013786315918, | |
| "eval_nll_loss": 1.4078196287155151, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": -0.2439371794462204, | |
| "eval_rewards/margins": 0.2377973347902298, | |
| "eval_rewards/rejected": -0.4817345142364502, | |
| "eval_runtime": 0.6115, | |
| "eval_samples_per_second": 224.05, | |
| "eval_steps_per_second": 4.906, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 17.24822695035461, | |
| "grad_norm": 29.23589515686035, | |
| "learning_rate": 3.5958275117433404e-08, | |
| "log_odds_chosen": 0.5763309001922607, | |
| "log_odds_ratio": -0.5261004567146301, | |
| "logits/chosen": -4.398637294769287, | |
| "logits/rejected": -4.560643672943115, | |
| "logps/chosen": -1.3885968923568726, | |
| "logps/rejected": -1.8019691705703735, | |
| "loss": 1.3886, | |
| "nll_loss": 1.3023698329925537, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -0.20828954875469208, | |
| "rewards/margins": 0.06200582906603813, | |
| "rewards/rejected": -0.2702953815460205, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 18.156028368794328, | |
| "grad_norm": 27.693330764770508, | |
| "learning_rate": 1.8569007682777415e-08, | |
| "log_odds_chosen": 0.7424343824386597, | |
| "log_odds_ratio": -0.46295538544654846, | |
| "logits/chosen": -4.579552173614502, | |
| "logits/rejected": -4.691650390625, | |
| "logps/chosen": -1.3507909774780273, | |
| "logps/rejected": -1.9150110483169556, | |
| "loss": 1.3865, | |
| "nll_loss": 1.3111711740493774, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -0.20261868834495544, | |
| "rewards/margins": 0.084633007645607, | |
| "rewards/rejected": -0.28725165128707886, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 19.06382978723404, | |
| "grad_norm": 37.925621032714844, | |
| "learning_rate": 6.738782355044048e-09, | |
| "log_odds_chosen": 0.6857459545135498, | |
| "log_odds_ratio": -0.4916977882385254, | |
| "logits/chosen": -4.52652645111084, | |
| "logits/rejected": -4.689857482910156, | |
| "logps/chosen": -1.341786503791809, | |
| "logps/rejected": -1.8565285205841064, | |
| "loss": 1.3794, | |
| "nll_loss": 1.2754034996032715, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -0.20126797258853912, | |
| "rewards/margins": 0.07721129059791565, | |
| "rewards/rejected": -0.27847927808761597, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 19.97163120567376, | |
| "grad_norm": 24.041799545288086, | |
| "learning_rate": 7.51764708051994e-10, | |
| "log_odds_chosen": 0.6411248445510864, | |
| "log_odds_ratio": -0.5100895762443542, | |
| "logits/chosen": -4.384097099304199, | |
| "logits/rejected": -4.515219688415527, | |
| "logps/chosen": -1.3920109272003174, | |
| "logps/rejected": -1.8711614608764648, | |
| "loss": 1.3805, | |
| "nll_loss": 1.2700397968292236, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -0.20880162715911865, | |
| "rewards/margins": 0.07187257707118988, | |
| "rewards/rejected": -0.2806742191314697, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 20.425531914893618, | |
| "grad_norm": 28.226720809936523, | |
| "learning_rate": 0.0, | |
| "log_odds_chosen": 0.6200518608093262, | |
| "log_odds_ratio": -0.529932975769043, | |
| "logits/chosen": -4.434691905975342, | |
| "logits/rejected": -4.575813293457031, | |
| "logps/chosen": -1.416117548942566, | |
| "logps/rejected": -1.8715832233428955, | |
| "loss": 1.3893, | |
| "nll_loss": 1.2817054986953735, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -0.2124176323413849, | |
| "rewards/margins": 0.0683198943734169, | |
| "rewards/rejected": -0.2807375192642212, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 20.425531914893618, | |
| "eval_log_odds_chosen": 1.7479673624038696, | |
| "eval_log_odds_ratio": -0.1867920309305191, | |
| "eval_logits/chosen": -4.75565767288208, | |
| "eval_logits/rejected": -4.538194179534912, | |
| "eval_logps/chosen": -1.6300764083862305, | |
| "eval_logps/rejected": -3.2087719440460205, | |
| "eval_loss": 1.2522811889648438, | |
| "eval_nll_loss": 1.4028778076171875, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": -0.24451148509979248, | |
| "eval_rewards/margins": 0.23680436611175537, | |
| "eval_rewards/rejected": -0.48131585121154785, | |
| "eval_runtime": 0.615, | |
| "eval_samples_per_second": 222.778, | |
| "eval_steps_per_second": 4.878, | |
| "step": 720 | |
| } | |
| ], | |
| "logging_steps": 32, | |
| "max_steps": 720, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 21, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |