| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "global_step": 22854, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.978121991773869e-05, | |
| "loss": 6.7838, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.956243983547738e-05, | |
| "loss": 6.1059, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9343659753216074e-05, | |
| "loss": 5.9324, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.912487967095476e-05, | |
| "loss": 5.7856, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.890609958869345e-05, | |
| "loss": 5.7135, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.868731950643214e-05, | |
| "loss": 5.6541, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.846853942417083e-05, | |
| "loss": 5.5545, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.824975934190952e-05, | |
| "loss": 5.5722, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.803097925964821e-05, | |
| "loss": 5.4627, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.7812199177386893e-05, | |
| "loss": 5.4158, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.759341909512558e-05, | |
| "loss": 5.3269, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.737463901286427e-05, | |
| "loss": 5.3289, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.715585893060296e-05, | |
| "loss": 5.3057, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.693707884834165e-05, | |
| "loss": 5.2426, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.671829876608034e-05, | |
| "loss": 5.1834, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.649951868381903e-05, | |
| "loss": 5.1129, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.628073860155771e-05, | |
| "loss": 5.1133, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.6061958519296404e-05, | |
| "loss": 4.9882, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.5843178437035094e-05, | |
| "loss": 5.0315, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.5624398354773784e-05, | |
| "loss": 4.9776, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.5405618272512475e-05, | |
| "loss": 4.9737, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.5186838190251165e-05, | |
| "loss": 4.8419, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.496805810798985e-05, | |
| "loss": 4.8256, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.474927802572854e-05, | |
| "loss": 4.8925, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.453049794346723e-05, | |
| "loss": 4.7332, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.431171786120592e-05, | |
| "loss": 4.7318, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.409293777894461e-05, | |
| "loss": 4.69, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.3874157696683295e-05, | |
| "loss": 4.6975, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.3655377614421985e-05, | |
| "loss": 4.7073, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.3436597532160675e-05, | |
| "loss": 4.5963, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.321781744989936e-05, | |
| "loss": 4.5638, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.299903736763805e-05, | |
| "loss": 4.5798, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.278025728537674e-05, | |
| "loss": 4.4876, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.256147720311543e-05, | |
| "loss": 4.5954, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.234269712085412e-05, | |
| "loss": 4.4998, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.212391703859281e-05, | |
| "loss": 4.495, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.1905136956331495e-05, | |
| "loss": 4.537, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.1686356874070186e-05, | |
| "loss": 4.4669, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.1467576791808876e-05, | |
| "loss": 4.5145, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.1248796709547566e-05, | |
| "loss": 4.3922, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.103001662728626e-05, | |
| "loss": 4.4273, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.081123654502495e-05, | |
| "loss": 4.3919, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.059245646276363e-05, | |
| "loss": 4.3121, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.037367638050232e-05, | |
| "loss": 4.419, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.015489629824101e-05, | |
| "loss": 4.3461, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 3.9936116215979696e-05, | |
| "loss": 4.2377, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.9717336133718386e-05, | |
| "loss": 4.2837, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.9498556051457077e-05, | |
| "loss": 4.2739, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.927977596919577e-05, | |
| "loss": 4.2504, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.906099588693445e-05, | |
| "loss": 4.2794, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.884221580467314e-05, | |
| "loss": 4.2125, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.862343572241183e-05, | |
| "loss": 4.2143, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.840465564015052e-05, | |
| "loss": 4.1628, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.818587555788921e-05, | |
| "loss": 4.1445, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.79670954756279e-05, | |
| "loss": 4.237, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.774831539336659e-05, | |
| "loss": 4.2284, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.752953531110528e-05, | |
| "loss": 4.1451, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.731075522884397e-05, | |
| "loss": 4.1644, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.709197514658266e-05, | |
| "loss": 4.1351, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.687319506432135e-05, | |
| "loss": 4.1229, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.665441498206004e-05, | |
| "loss": 4.1166, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.643563489979873e-05, | |
| "loss": 4.1468, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.621685481753741e-05, | |
| "loss": 4.1475, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.59980747352761e-05, | |
| "loss": 4.1488, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.577929465301479e-05, | |
| "loss": 4.0431, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.556051457075348e-05, | |
| "loss": 3.9611, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.534173448849217e-05, | |
| "loss": 4.1072, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.512295440623086e-05, | |
| "loss": 4.0247, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.490417432396955e-05, | |
| "loss": 3.9853, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.468539424170823e-05, | |
| "loss": 3.9586, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.446661415944692e-05, | |
| "loss": 4.0029, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.4247834077185614e-05, | |
| "loss": 4.0219, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.4029053994924304e-05, | |
| "loss": 3.9708, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.3810273912662995e-05, | |
| "loss": 3.9856, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.3591493830401685e-05, | |
| "loss": 3.9354, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.337271374814037e-05, | |
| "loss": 3.98, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.315393366587906e-05, | |
| "loss": 3.9338, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.293515358361775e-05, | |
| "loss": 3.9415, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.271637350135644e-05, | |
| "loss": 3.9926, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.249759341909513e-05, | |
| "loss": 3.9005, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.227881333683382e-05, | |
| "loss": 3.9296, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.2060033254572505e-05, | |
| "loss": 3.9252, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.184125317231119e-05, | |
| "loss": 3.8741, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.162247309004988e-05, | |
| "loss": 3.9018, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.140369300778857e-05, | |
| "loss": 3.8671, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.118491292552726e-05, | |
| "loss": 3.9379, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.096613284326595e-05, | |
| "loss": 3.8542, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.074735276100464e-05, | |
| "loss": 3.7767, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.052857267874333e-05, | |
| "loss": 3.7247, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.0309792596482018e-05, | |
| "loss": 3.7847, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.0091012514220705e-05, | |
| "loss": 3.7639, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 2.9872232431959396e-05, | |
| "loss": 3.7938, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.9653452349698086e-05, | |
| "loss": 3.7549, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.9434672267436773e-05, | |
| "loss": 3.774, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.9215892185175464e-05, | |
| "loss": 3.7728, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.8997112102914154e-05, | |
| "loss": 3.7448, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.877833202065284e-05, | |
| "loss": 3.7276, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.8559551938391532e-05, | |
| "loss": 3.7897, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.8340771856130222e-05, | |
| "loss": 3.7273, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.812199177386891e-05, | |
| "loss": 3.745, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.7903211691607596e-05, | |
| "loss": 3.7491, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.7684431609346283e-05, | |
| "loss": 3.8055, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.7465651527084974e-05, | |
| "loss": 3.6932, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.7246871444823664e-05, | |
| "loss": 3.7565, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.702809136256235e-05, | |
| "loss": 3.6891, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.6809311280301042e-05, | |
| "loss": 3.7535, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.6590531198039732e-05, | |
| "loss": 3.5967, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.637175111577842e-05, | |
| "loss": 3.637, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.615297103351711e-05, | |
| "loss": 3.6867, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.59341909512558e-05, | |
| "loss": 3.6911, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.5715410868994487e-05, | |
| "loss": 3.6094, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.5496630786733178e-05, | |
| "loss": 3.6662, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.5277850704471868e-05, | |
| "loss": 3.6195, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.5059070622210555e-05, | |
| "loss": 3.6519, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.4840290539949242e-05, | |
| "loss": 3.6181, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.4621510457687933e-05, | |
| "loss": 3.7262, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.4402730375426623e-05, | |
| "loss": 3.6216, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.418395029316531e-05, | |
| "loss": 3.613, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.3965170210904e-05, | |
| "loss": 3.6809, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.374639012864269e-05, | |
| "loss": 3.6462, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.352761004638138e-05, | |
| "loss": 3.6063, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.330882996412007e-05, | |
| "loss": 3.6304, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.309004988185876e-05, | |
| "loss": 3.6105, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.2871269799597443e-05, | |
| "loss": 3.5885, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.2652489717336133e-05, | |
| "loss": 3.6111, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.2433709635074824e-05, | |
| "loss": 3.572, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.221492955281351e-05, | |
| "loss": 3.6038, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.19961494705522e-05, | |
| "loss": 3.6314, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.1777369388290892e-05, | |
| "loss": 3.5771, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.1558589306029582e-05, | |
| "loss": 3.5381, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.133980922376827e-05, | |
| "loss": 3.5831, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.112102914150696e-05, | |
| "loss": 3.5452, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.0902249059245647e-05, | |
| "loss": 3.6384, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.0683468976984334e-05, | |
| "loss": 3.5737, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.0464688894723024e-05, | |
| "loss": 3.684, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.0245908812461715e-05, | |
| "loss": 3.6435, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.0027128730200402e-05, | |
| "loss": 3.5359, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.9808348647939092e-05, | |
| "loss": 3.5537, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.9589568565677783e-05, | |
| "loss": 3.5483, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.937078848341647e-05, | |
| "loss": 3.4914, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.915200840115516e-05, | |
| "loss": 3.4733, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.8933228318893847e-05, | |
| "loss": 3.5044, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.8714448236632538e-05, | |
| "loss": 3.5383, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.8495668154371225e-05, | |
| "loss": 3.5945, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.8276888072109915e-05, | |
| "loss": 3.5568, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.8058107989848606e-05, | |
| "loss": 3.4929, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.7839327907587293e-05, | |
| "loss": 3.5268, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.7620547825325983e-05, | |
| "loss": 3.5304, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.7401767743064674e-05, | |
| "loss": 3.4343, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.718298766080336e-05, | |
| "loss": 3.5578, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.6964207578542048e-05, | |
| "loss": 3.4089, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.674542749628074e-05, | |
| "loss": 3.4965, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.652664741401943e-05, | |
| "loss": 3.4984, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.6307867331758116e-05, | |
| "loss": 3.5046, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.6089087249496806e-05, | |
| "loss": 3.5016, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.5870307167235497e-05, | |
| "loss": 3.5667, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.5651527084974184e-05, | |
| "loss": 3.5435, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.5432747002712874e-05, | |
| "loss": 3.5086, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.5213966920451563e-05, | |
| "loss": 3.4831, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.499518683819025e-05, | |
| "loss": 3.3926, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.4776406755928939e-05, | |
| "loss": 3.4086, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.455762667366763e-05, | |
| "loss": 3.4987, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.4338846591406318e-05, | |
| "loss": 3.4697, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.4120066509145009e-05, | |
| "loss": 3.5231, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.3901286426883698e-05, | |
| "loss": 3.4806, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.3682506344622386e-05, | |
| "loss": 3.5263, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.3463726262361077e-05, | |
| "loss": 3.3846, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.3244946180099765e-05, | |
| "loss": 3.4415, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.3026166097838454e-05, | |
| "loss": 3.3833, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.2807386015577141e-05, | |
| "loss": 3.4666, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.258860593331583e-05, | |
| "loss": 3.4884, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.236982585105452e-05, | |
| "loss": 3.3259, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.215104576879321e-05, | |
| "loss": 3.4231, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.1932265686531898e-05, | |
| "loss": 3.4166, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.1713485604270589e-05, | |
| "loss": 3.4279, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.1494705522009276e-05, | |
| "loss": 3.4174, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.1275925439747966e-05, | |
| "loss": 3.5166, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.1057145357486655e-05, | |
| "loss": 3.4309, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.0838365275225344e-05, | |
| "loss": 3.4038, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.0619585192964034e-05, | |
| "loss": 3.3785, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.0400805110702721e-05, | |
| "loss": 3.4292, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.0182025028441412e-05, | |
| "loss": 3.4647, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 9.9632449461801e-06, | |
| "loss": 3.4627, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 9.744464863918789e-06, | |
| "loss": 3.3596, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 9.52568478165748e-06, | |
| "loss": 3.3967, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 9.306904699396167e-06, | |
| "loss": 3.4934, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 9.088124617134857e-06, | |
| "loss": 3.4232, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 8.869344534873546e-06, | |
| "loss": 3.3957, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.650564452612235e-06, | |
| "loss": 3.396, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 8.431784370350923e-06, | |
| "loss": 3.4012, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 8.213004288089612e-06, | |
| "loss": 3.3845, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 7.994224205828303e-06, | |
| "loss": 3.3873, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 7.775444123566991e-06, | |
| "loss": 3.4209, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 7.55666404130568e-06, | |
| "loss": 3.3871, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 7.337883959044369e-06, | |
| "loss": 3.4716, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.119103876783058e-06, | |
| "loss": 3.4281, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 6.900323794521747e-06, | |
| "loss": 3.3927, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 6.681543712260437e-06, | |
| "loss": 3.374, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 6.462763629999125e-06, | |
| "loss": 3.3668, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.2439835477378135e-06, | |
| "loss": 3.3673, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 6.025203465476503e-06, | |
| "loss": 3.3633, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 5.806423383215193e-06, | |
| "loss": 3.3235, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 5.5876433009538815e-06, | |
| "loss": 3.3816, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.36886321869257e-06, | |
| "loss": 3.3943, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.150083136431259e-06, | |
| "loss": 3.3534, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 4.931303054169949e-06, | |
| "loss": 3.4587, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.7125229719086374e-06, | |
| "loss": 3.3572, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.493742889647327e-06, | |
| "loss": 3.3056, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.274962807386016e-06, | |
| "loss": 3.3853, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 4.0561827251247046e-06, | |
| "loss": 3.4141, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.837402642863394e-06, | |
| "loss": 3.3369, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.618622560602083e-06, | |
| "loss": 3.366, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.399842478340772e-06, | |
| "loss": 3.4209, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.181062396079461e-06, | |
| "loss": 3.3659, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.96228231381815e-06, | |
| "loss": 3.3432, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.7435022315568393e-06, | |
| "loss": 3.4514, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.524722149295528e-06, | |
| "loss": 3.3064, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.3059420670342172e-06, | |
| "loss": 3.4154, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.0871619847729064e-06, | |
| "loss": 3.3934, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.8683819025115952e-06, | |
| "loss": 3.3657, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.6496018202502846e-06, | |
| "loss": 3.3707, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.4308217379889735e-06, | |
| "loss": 3.3213, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.2120416557276627e-06, | |
| "loss": 3.351, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 9.932615734663517e-07, | |
| "loss": 3.3376, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 7.744814912050407e-07, | |
| "loss": 3.2788, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 5.557014089437298e-07, | |
| "loss": 3.3773, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 3.3692132668241884e-07, | |
| "loss": 3.3724, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.1814124442110792e-07, | |
| "loss": 3.3345, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 22854, | |
| "total_flos": 1.2032870585175245e+17, | |
| "train_loss": 3.9046004770976137, | |
| "train_runtime": 22926.8282, | |
| "train_samples_per_second": 19.935, | |
| "train_steps_per_second": 0.997 | |
| } | |
| ], | |
| "max_steps": 22854, | |
| "num_train_epochs": 3, | |
| "total_flos": 1.2032870585175245e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |