{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 785, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.032, "grad_norm": 8.14716683209386, "learning_rate": 2.0253164556962026e-06, "loss": 0.8605, "loss_nan_ranks": 0, "loss_rank_avg": 0.21926625072956085, "step": 5, "valid_targets_mean": 2079.4, "valid_targets_min": 286 }, { "epoch": 0.064, "grad_norm": 5.920402356323808, "learning_rate": 4.556962025316456e-06, "loss": 0.8727, "loss_nan_ranks": 0, "loss_rank_avg": 0.19437697529792786, "step": 10, "valid_targets_mean": 1853.0, "valid_targets_min": 412 }, { "epoch": 0.096, "grad_norm": 3.0062336850433558, "learning_rate": 7.08860759493671e-06, "loss": 0.7878, "loss_nan_ranks": 0, "loss_rank_avg": 0.1802225559949875, "step": 15, "valid_targets_mean": 1922.9, "valid_targets_min": 276 }, { "epoch": 0.128, "grad_norm": 1.2909345612294552, "learning_rate": 9.620253164556963e-06, "loss": 0.728, "loss_nan_ranks": 0, "loss_rank_avg": 0.18598613142967224, "step": 20, "valid_targets_mean": 1775.9, "valid_targets_min": 312 }, { "epoch": 0.16, "grad_norm": 1.103229222327004, "learning_rate": 1.2151898734177216e-05, "loss": 0.6844, "loss_nan_ranks": 0, "loss_rank_avg": 0.1875910460948944, "step": 25, "valid_targets_mean": 2530.6, "valid_targets_min": 752 }, { "epoch": 0.192, "grad_norm": 0.6635018774932621, "learning_rate": 1.468354430379747e-05, "loss": 0.6546, "loss_nan_ranks": 0, "loss_rank_avg": 0.1586608588695526, "step": 30, "valid_targets_mean": 2136.8, "valid_targets_min": 271 }, { "epoch": 0.224, "grad_norm": 0.6433009017179456, "learning_rate": 1.7215189873417723e-05, "loss": 0.6335, "loss_nan_ranks": 0, "loss_rank_avg": 0.1259375810623169, "step": 35, "valid_targets_mean": 1263.0, "valid_targets_min": 255 }, { "epoch": 0.256, "grad_norm": 0.51393810893032, "learning_rate": 1.974683544303798e-05, "loss": 0.5936, "loss_nan_ranks": 0, "loss_rank_avg": 0.13195070624351501, "step": 40, "valid_targets_mean": 2070.8, "valid_targets_min": 203 }, { "epoch": 0.288, "grad_norm": 0.45709621074264073, "learning_rate": 2.2278481012658228e-05, "loss": 0.5881, "loss_nan_ranks": 0, "loss_rank_avg": 0.15329253673553467, "step": 45, "valid_targets_mean": 1862.8, "valid_targets_min": 327 }, { "epoch": 0.32, "grad_norm": 0.4244232378630844, "learning_rate": 2.481012658227848e-05, "loss": 0.5533, "loss_nan_ranks": 0, "loss_rank_avg": 0.1363927721977234, "step": 50, "valid_targets_mean": 1982.9, "valid_targets_min": 309 }, { "epoch": 0.352, "grad_norm": 0.4139880007535703, "learning_rate": 2.7341772151898737e-05, "loss": 0.5573, "loss_nan_ranks": 0, "loss_rank_avg": 0.09996577352285385, "step": 55, "valid_targets_mean": 1315.6, "valid_targets_min": 237 }, { "epoch": 0.384, "grad_norm": 0.37429661394381025, "learning_rate": 2.987341772151899e-05, "loss": 0.5407, "loss_nan_ranks": 0, "loss_rank_avg": 0.14283347129821777, "step": 60, "valid_targets_mean": 2382.9, "valid_targets_min": 500 }, { "epoch": 0.416, "grad_norm": 0.36737620523128234, "learning_rate": 3.240506329113924e-05, "loss": 0.5355, "loss_nan_ranks": 0, "loss_rank_avg": 0.1327112913131714, "step": 65, "valid_targets_mean": 2425.0, "valid_targets_min": 390 }, { "epoch": 0.448, "grad_norm": 0.3753920465218165, "learning_rate": 3.49367088607595e-05, "loss": 0.5103, "loss_nan_ranks": 0, "loss_rank_avg": 0.11590541899204254, "step": 70, "valid_targets_mean": 2080.2, "valid_targets_min": 324 }, { "epoch": 0.48, "grad_norm": 0.3940828492197012, "learning_rate": 3.746835443037975e-05, "loss": 0.4911, "loss_nan_ranks": 0, "loss_rank_avg": 0.11118647456169128, "step": 75, "valid_targets_mean": 1946.9, "valid_targets_min": 304 }, { "epoch": 0.512, "grad_norm": 0.423572144780963, "learning_rate": 4e-05, "loss": 0.5068, "loss_nan_ranks": 0, "loss_rank_avg": 0.15440745651721954, "step": 80, "valid_targets_mean": 2268.2, "valid_targets_min": 241 }, { "epoch": 0.544, "grad_norm": 0.38629005268705274, "learning_rate": 3.999504991751045e-05, "loss": 0.5058, "loss_nan_ranks": 0, "loss_rank_avg": 0.14010299742221832, "step": 85, "valid_targets_mean": 2403.1, "valid_targets_min": 235 }, { "epoch": 0.576, "grad_norm": 0.3653540835536824, "learning_rate": 3.9980202120373464e-05, "loss": 0.4937, "loss_nan_ranks": 0, "loss_rank_avg": 0.11356395483016968, "step": 90, "valid_targets_mean": 2140.4, "valid_targets_min": 363 }, { "epoch": 0.608, "grad_norm": 0.3885775064170145, "learning_rate": 3.995546395837111e-05, "loss": 0.4935, "loss_nan_ranks": 0, "loss_rank_avg": 0.11334758996963501, "step": 95, "valid_targets_mean": 1886.6, "valid_targets_min": 316 }, { "epoch": 0.64, "grad_norm": 0.4105257676508864, "learning_rate": 3.992084767709763e-05, "loss": 0.4818, "loss_nan_ranks": 0, "loss_rank_avg": 0.1429881751537323, "step": 100, "valid_targets_mean": 2493.0, "valid_targets_min": 398 }, { "epoch": 0.672, "grad_norm": 0.43132900302069627, "learning_rate": 3.987637041189781e-05, "loss": 0.4761, "loss_nan_ranks": 0, "loss_rank_avg": 0.12061458826065063, "step": 105, "valid_targets_mean": 1932.0, "valid_targets_min": 295 }, { "epoch": 0.704, "grad_norm": 0.3900655075347954, "learning_rate": 3.982205417938482e-05, "loss": 0.4757, "loss_nan_ranks": 0, "loss_rank_avg": 0.10593143105506897, "step": 110, "valid_targets_mean": 1720.4, "valid_targets_min": 262 }, { "epoch": 0.736, "grad_norm": 0.3585316531700683, "learning_rate": 3.975792586654179e-05, "loss": 0.469, "loss_nan_ranks": 0, "loss_rank_avg": 0.10749523341655731, "step": 115, "valid_targets_mean": 1945.8, "valid_targets_min": 359 }, { "epoch": 0.768, "grad_norm": 0.43567911679608934, "learning_rate": 3.968401721741259e-05, "loss": 0.4695, "loss_nan_ranks": 0, "loss_rank_avg": 0.10033411532640457, "step": 120, "valid_targets_mean": 1561.8, "valid_targets_min": 422 }, { "epoch": 0.8, "grad_norm": 0.35672280853176086, "learning_rate": 3.960036481738819e-05, "loss": 0.4668, "loss_nan_ranks": 0, "loss_rank_avg": 0.08036337792873383, "step": 125, "valid_targets_mean": 1859.0, "valid_targets_min": 296 }, { "epoch": 0.832, "grad_norm": 0.424096009768031, "learning_rate": 3.950701007509667e-05, "loss": 0.4663, "loss_nan_ranks": 0, "loss_rank_avg": 0.11247564852237701, "step": 130, "valid_targets_mean": 1561.5, "valid_targets_min": 311 }, { "epoch": 0.864, "grad_norm": 0.41273109498551225, "learning_rate": 3.940399920190552e-05, "loss": 0.4796, "loss_nan_ranks": 0, "loss_rank_avg": 0.10375432670116425, "step": 135, "valid_targets_mean": 1782.9, "valid_targets_min": 376 }, { "epoch": 0.896, "grad_norm": 0.40183143471459065, "learning_rate": 3.92913831890467e-05, "loss": 0.4708, "loss_nan_ranks": 0, "loss_rank_avg": 0.12724314630031586, "step": 140, "valid_targets_mean": 2133.4, "valid_targets_min": 315 }, { "epoch": 0.928, "grad_norm": 0.3726564639586119, "learning_rate": 3.916921778237556e-05, "loss": 0.464, "loss_nan_ranks": 0, "loss_rank_avg": 0.11453776806592941, "step": 145, "valid_targets_mean": 1996.9, "valid_targets_min": 298 }, { "epoch": 0.96, "grad_norm": 0.4057662458741948, "learning_rate": 3.903756345477612e-05, "loss": 0.47, "loss_nan_ranks": 0, "loss_rank_avg": 0.12043265253305435, "step": 150, "valid_targets_mean": 2017.5, "valid_targets_min": 293 }, { "epoch": 0.992, "grad_norm": 0.3818040973232193, "learning_rate": 3.889648537622657e-05, "loss": 0.4581, "loss_nan_ranks": 0, "loss_rank_avg": 0.111931711435318, "step": 155, "valid_targets_mean": 1964.6, "valid_targets_min": 356 }, { "epoch": 1.0192, "grad_norm": 0.3703962708616199, "learning_rate": 3.874605338153952e-05, "loss": 0.4504, "loss_nan_ranks": 0, "loss_rank_avg": 0.13583062589168549, "step": 160, "valid_targets_mean": 2906.2, "valid_targets_min": 298 }, { "epoch": 1.0512, "grad_norm": 0.3642845207462427, "learning_rate": 3.8586341935793265e-05, "loss": 0.4466, "loss_nan_ranks": 0, "loss_rank_avg": 0.0990055650472641, "step": 165, "valid_targets_mean": 1836.2, "valid_targets_min": 443 }, { "epoch": 1.0832, "grad_norm": 0.41003526617941, "learning_rate": 3.841743009747089e-05, "loss": 0.4539, "loss_nan_ranks": 0, "loss_rank_avg": 0.10039589554071426, "step": 170, "valid_targets_mean": 1632.0, "valid_targets_min": 372 }, { "epoch": 1.1152, "grad_norm": 0.3712919063822357, "learning_rate": 3.8239401479325714e-05, "loss": 0.4449, "loss_nan_ranks": 0, "loss_rank_avg": 0.0898459255695343, "step": 175, "valid_targets_mean": 2068.0, "valid_targets_min": 280 }, { "epoch": 1.1472, "grad_norm": 0.3934150558873887, "learning_rate": 3.8052344206992276e-05, "loss": 0.4476, "loss_nan_ranks": 0, "loss_rank_avg": 0.11210045218467712, "step": 180, "valid_targets_mean": 2007.4, "valid_targets_min": 416 }, { "epoch": 1.1792, "grad_norm": 0.37665544250528954, "learning_rate": 3.7856350875363396e-05, "loss": 0.4586, "loss_nan_ranks": 0, "loss_rank_avg": 0.1279641091823578, "step": 185, "valid_targets_mean": 2534.1, "valid_targets_min": 361 }, { "epoch": 1.2112, "grad_norm": 0.41690469322332946, "learning_rate": 3.765151850275497e-05, "loss": 0.4511, "loss_nan_ranks": 0, "loss_rank_avg": 0.09716811776161194, "step": 190, "valid_targets_mean": 1532.4, "valid_targets_min": 295 }, { "epoch": 1.2432, "grad_norm": 0.44530184711328274, "learning_rate": 3.7437948482881104e-05, "loss": 0.4377, "loss_nan_ranks": 0, "loss_rank_avg": 0.09476161003112793, "step": 195, "valid_targets_mean": 1602.2, "valid_targets_min": 336 }, { "epoch": 1.2752, "grad_norm": 0.37960789961345176, "learning_rate": 3.721574653466336e-05, "loss": 0.4392, "loss_nan_ranks": 0, "loss_rank_avg": 0.09985041618347168, "step": 200, "valid_targets_mean": 1641.4, "valid_targets_min": 265 }, { "epoch": 1.3072, "grad_norm": 0.37938712023713544, "learning_rate": 3.698502264989903e-05, "loss": 0.4538, "loss_nan_ranks": 0, "loss_rank_avg": 0.11186648160219193, "step": 205, "valid_targets_mean": 2174.2, "valid_targets_min": 351 }, { "epoch": 1.3392, "grad_norm": 0.3547756314318588, "learning_rate": 3.674589103881432e-05, "loss": 0.4404, "loss_nan_ranks": 0, "loss_rank_avg": 0.09262645244598389, "step": 210, "valid_targets_mean": 1916.3, "valid_targets_min": 401 }, { "epoch": 1.3712, "grad_norm": 0.3878066206758719, "learning_rate": 3.64984700735293e-05, "loss": 0.4452, "loss_nan_ranks": 0, "loss_rank_avg": 0.10997898131608963, "step": 215, "valid_targets_mean": 1802.9, "valid_targets_min": 374 }, { "epoch": 1.4032, "grad_norm": 0.3804522983794246, "learning_rate": 3.624288222946273e-05, "loss": 0.4439, "loss_nan_ranks": 0, "loss_rank_avg": 0.10874274373054504, "step": 220, "valid_targets_mean": 2008.3, "valid_targets_min": 256 }, { "epoch": 1.4352, "grad_norm": 0.41414921165828417, "learning_rate": 3.597925402470578e-05, "loss": 0.4397, "loss_nan_ranks": 0, "loss_rank_avg": 0.1123582273721695, "step": 225, "valid_targets_mean": 1910.8, "valid_targets_min": 328 }, { "epoch": 1.4672, "grad_norm": 0.3797185289332039, "learning_rate": 3.570771595739445e-05, "loss": 0.4451, "loss_nan_ranks": 0, "loss_rank_avg": 0.11866221576929092, "step": 230, "valid_targets_mean": 2292.9, "valid_targets_min": 303 }, { "epoch": 1.4992, "grad_norm": 0.42425280525062664, "learning_rate": 3.5428402441111964e-05, "loss": 0.4459, "loss_nan_ranks": 0, "loss_rank_avg": 0.09500952064990997, "step": 235, "valid_targets_mean": 1600.9, "valid_targets_min": 250 }, { "epoch": 1.5312000000000001, "grad_norm": 0.393496198093285, "learning_rate": 3.5141451738352936e-05, "loss": 0.4401, "loss_nan_ranks": 0, "loss_rank_avg": 0.0935092568397522, "step": 240, "valid_targets_mean": 1732.9, "valid_targets_min": 309 }, { "epoch": 1.5632000000000001, "grad_norm": 0.3738047088377615, "learning_rate": 3.4847005892082266e-05, "loss": 0.44, "loss_nan_ranks": 0, "loss_rank_avg": 0.1055334210395813, "step": 245, "valid_targets_mean": 2109.4, "valid_targets_min": 317 }, { "epoch": 1.5952, "grad_norm": 0.36004468489098, "learning_rate": 3.454521065542273e-05, "loss": 0.4432, "loss_nan_ranks": 0, "loss_rank_avg": 0.09839586913585663, "step": 250, "valid_targets_mean": 2300.9, "valid_targets_min": 302 }, { "epoch": 1.6272, "grad_norm": 0.37942976494157027, "learning_rate": 3.423621541950597e-05, "loss": 0.437, "loss_nan_ranks": 0, "loss_rank_avg": 0.12363630533218384, "step": 255, "valid_targets_mean": 2528.2, "valid_targets_min": 335 }, { "epoch": 1.6592, "grad_norm": 0.3891071271138682, "learning_rate": 3.3920173139522664e-05, "loss": 0.4312, "loss_nan_ranks": 0, "loss_rank_avg": 0.07246629148721695, "step": 260, "valid_targets_mean": 1333.9, "valid_targets_min": 319 }, { "epoch": 1.6912, "grad_norm": 0.39875855724956927, "learning_rate": 3.35972402590084e-05, "loss": 0.4297, "loss_nan_ranks": 0, "loss_rank_avg": 0.09168781340122223, "step": 265, "valid_targets_mean": 1581.8, "valid_targets_min": 318 }, { "epoch": 1.7231999999999998, "grad_norm": 0.3769289197698518, "learning_rate": 3.326757663240291e-05, "loss": 0.4352, "loss_nan_ranks": 0, "loss_rank_avg": 0.130869060754776, "step": 270, "valid_targets_mean": 3350.1, "valid_targets_min": 506 }, { "epoch": 1.7551999999999999, "grad_norm": 0.3863770231913207, "learning_rate": 3.293134544592073e-05, "loss": 0.4336, "loss_nan_ranks": 0, "loss_rank_avg": 0.10226617753505707, "step": 275, "valid_targets_mean": 1733.9, "valid_targets_min": 282 }, { "epoch": 1.7872, "grad_norm": 0.3654977150425626, "learning_rate": 3.258871313677274e-05, "loss": 0.4358, "loss_nan_ranks": 0, "loss_rank_avg": 0.1077582985162735, "step": 280, "valid_targets_mean": 1944.8, "valid_targets_min": 373 }, { "epoch": 1.8192, "grad_norm": 0.3511862081472535, "learning_rate": 3.2239849310778316e-05, "loss": 0.438, "loss_nan_ranks": 0, "loss_rank_avg": 0.07879994809627533, "step": 285, "valid_targets_mean": 1390.5, "valid_targets_min": 274 }, { "epoch": 1.8512, "grad_norm": 0.4243989961030966, "learning_rate": 3.188492665840909e-05, "loss": 0.4348, "loss_nan_ranks": 0, "loss_rank_avg": 0.09362339973449707, "step": 290, "valid_targets_mean": 1745.0, "valid_targets_min": 391 }, { "epoch": 1.8832, "grad_norm": 0.36259010129352315, "learning_rate": 3.1524120869305726e-05, "loss": 0.4305, "loss_nan_ranks": 0, "loss_rank_avg": 0.08184528350830078, "step": 295, "valid_targets_mean": 1940.2, "valid_targets_min": 387 }, { "epoch": 1.9152, "grad_norm": 0.37769494018077754, "learning_rate": 3.11576105453101e-05, "loss": 0.4305, "loss_nan_ranks": 0, "loss_rank_avg": 0.10083824396133423, "step": 300, "valid_targets_mean": 1933.9, "valid_targets_min": 288 }, { "epoch": 1.9472, "grad_norm": 0.3707943711792651, "learning_rate": 3.0785577112055916e-05, "loss": 0.4315, "loss_nan_ranks": 0, "loss_rank_avg": 0.13190993666648865, "step": 305, "valid_targets_mean": 2406.9, "valid_targets_min": 346 }, { "epoch": 1.9792, "grad_norm": 0.3971883899874949, "learning_rate": 3.040820472916153e-05, "loss": 0.4279, "loss_nan_ranks": 0, "loss_rank_avg": 0.10543644428253174, "step": 310, "valid_targets_mean": 1951.4, "valid_targets_min": 326 }, { "epoch": 2.0064, "grad_norm": 0.3924708322471276, "learning_rate": 3.002568019906939e-05, "loss": 0.4169, "loss_nan_ranks": 0, "loss_rank_avg": 0.09858863800764084, "step": 315, "valid_targets_mean": 2251.6, "valid_targets_min": 504 }, { "epoch": 2.0384, "grad_norm": 0.3571864594713526, "learning_rate": 2.963819287457733e-05, "loss": 0.4226, "loss_nan_ranks": 0, "loss_rank_avg": 0.09932637959718704, "step": 320, "valid_targets_mean": 2090.6, "valid_targets_min": 326 }, { "epoch": 2.0704, "grad_norm": 0.40961777644084196, "learning_rate": 2.924593456510733e-05, "loss": 0.4135, "loss_nan_ranks": 0, "loss_rank_avg": 0.09342212975025177, "step": 325, "valid_targets_mean": 1637.6, "valid_targets_min": 254 }, { "epoch": 2.1024, "grad_norm": 0.3815641523611149, "learning_rate": 2.8849099441758306e-05, "loss": 0.42, "loss_nan_ranks": 0, "loss_rank_avg": 0.0778791606426239, "step": 330, "valid_targets_mean": 1502.4, "valid_targets_min": 416 }, { "epoch": 2.1344, "grad_norm": 0.3732479385561735, "learning_rate": 2.844788394118979e-05, "loss": 0.4145, "loss_nan_ranks": 0, "loss_rank_avg": 0.1075972318649292, "step": 335, "valid_targets_mean": 1884.4, "valid_targets_min": 410 }, { "epoch": 2.1664, "grad_norm": 0.39979959993168973, "learning_rate": 2.8042486668384164e-05, "loss": 0.4148, "loss_nan_ranks": 0, "loss_rank_avg": 0.1028364896774292, "step": 340, "valid_targets_mean": 1881.4, "valid_targets_min": 300 }, { "epoch": 2.1984, "grad_norm": 0.3436480691156955, "learning_rate": 2.7633108298335582e-05, "loss": 0.4188, "loss_nan_ranks": 0, "loss_rank_avg": 0.09203699231147766, "step": 345, "valid_targets_mean": 2002.6, "valid_targets_min": 274 }, { "epoch": 2.2304, "grad_norm": 0.3637947305985978, "learning_rate": 2.721995147671416e-05, "loss": 0.4066, "loss_nan_ranks": 0, "loss_rank_avg": 0.0962645411491394, "step": 350, "valid_targets_mean": 1920.9, "valid_targets_min": 352 }, { "epoch": 2.2624, "grad_norm": 0.3804404229247085, "learning_rate": 2.68032207195547e-05, "loss": 0.4216, "loss_nan_ranks": 0, "loss_rank_avg": 0.10509411990642548, "step": 355, "valid_targets_mean": 1938.3, "valid_targets_min": 242 }, { "epoch": 2.2944, "grad_norm": 0.36736829313156527, "learning_rate": 2.6383122312019604e-05, "loss": 0.42, "loss_nan_ranks": 0, "loss_rank_avg": 0.10104438662528992, "step": 360, "valid_targets_mean": 2123.1, "valid_targets_min": 521 }, { "epoch": 2.3264, "grad_norm": 0.410927585160652, "learning_rate": 2.595986420628597e-05, "loss": 0.4138, "loss_nan_ranks": 0, "loss_rank_avg": 0.11637736111879349, "step": 365, "valid_targets_mean": 2104.9, "valid_targets_min": 355 }, { "epoch": 2.3584, "grad_norm": 0.36942595692393504, "learning_rate": 2.5533655918607573e-05, "loss": 0.4049, "loss_nan_ranks": 0, "loss_rank_avg": 0.09893950074911118, "step": 370, "valid_targets_mean": 1899.0, "valid_targets_min": 314 }, { "epoch": 2.3904, "grad_norm": 0.3991278234894574, "learning_rate": 2.510470842560259e-05, "loss": 0.4158, "loss_nan_ranks": 0, "loss_rank_avg": 0.09461153298616409, "step": 375, "valid_targets_mean": 2086.9, "valid_targets_min": 317 }, { "epoch": 2.4224, "grad_norm": 0.42884196450650014, "learning_rate": 2.467323405981841e-05, "loss": 0.422, "loss_nan_ranks": 0, "loss_rank_avg": 0.10895151644945145, "step": 380, "valid_targets_mean": 1752.6, "valid_targets_min": 437 }, { "epoch": 2.4544, "grad_norm": 0.36127559539249393, "learning_rate": 2.423944640462533e-05, "loss": 0.4167, "loss_nan_ranks": 0, "loss_rank_avg": 0.09095238149166107, "step": 385, "valid_targets_mean": 1855.2, "valid_targets_min": 492 }, { "epoch": 2.4864, "grad_norm": 0.3707773799453282, "learning_rate": 2.3803560188490968e-05, "loss": 0.4168, "loss_nan_ranks": 0, "loss_rank_avg": 0.09982647746801376, "step": 390, "valid_targets_mean": 1836.3, "valid_targets_min": 280 }, { "epoch": 2.5183999999999997, "grad_norm": 0.3947300871587244, "learning_rate": 2.336579117868789e-05, "loss": 0.4174, "loss_nan_ranks": 0, "loss_rank_avg": 0.08322621881961823, "step": 395, "valid_targets_mean": 1569.6, "valid_targets_min": 297 }, { "epoch": 2.5504, "grad_norm": 0.3635566015654518, "learning_rate": 2.292635607448711e-05, "loss": 0.4149, "loss_nan_ranks": 0, "loss_rank_avg": 0.07755577564239502, "step": 400, "valid_targets_mean": 1840.1, "valid_targets_min": 446 }, { "epoch": 2.5824, "grad_norm": 0.3658734403179243, "learning_rate": 2.248547239989008e-05, "loss": 0.4112, "loss_nan_ranks": 0, "loss_rank_avg": 0.10649602860212326, "step": 405, "valid_targets_mean": 2042.0, "valid_targets_min": 398 }, { "epoch": 2.6144, "grad_norm": 0.39300772886108815, "learning_rate": 2.204335839595255e-05, "loss": 0.4172, "loss_nan_ranks": 0, "loss_rank_avg": 0.0882931649684906, "step": 410, "valid_targets_mean": 1944.7, "valid_targets_min": 282 }, { "epoch": 2.6464, "grad_norm": 0.36160891144992, "learning_rate": 2.1600232912753452e-05, "loss": 0.4096, "loss_nan_ranks": 0, "loss_rank_avg": 0.11347469687461853, "step": 415, "valid_targets_mean": 2160.9, "valid_targets_min": 595 }, { "epoch": 2.6784, "grad_norm": 0.41551786205605684, "learning_rate": 2.1156315301062293e-05, "loss": 0.4223, "loss_nan_ranks": 0, "loss_rank_avg": 0.11216609179973602, "step": 420, "valid_targets_mean": 2079.2, "valid_targets_min": 254 }, { "epoch": 2.7104, "grad_norm": 0.41396235962664973, "learning_rate": 2.0711825303758712e-05, "loss": 0.4158, "loss_nan_ranks": 0, "loss_rank_avg": 0.14466342329978943, "step": 425, "valid_targets_mean": 2259.5, "valid_targets_min": 296 }, { "epoch": 2.7424, "grad_norm": 0.34162915412351563, "learning_rate": 2.0266982947057962e-05, "loss": 0.4085, "loss_nan_ranks": 0, "loss_rank_avg": 0.08012251555919647, "step": 430, "valid_targets_mean": 2034.8, "valid_targets_min": 281 }, { "epoch": 2.7744, "grad_norm": 0.3563276993886399, "learning_rate": 1.9822008431596083e-05, "loss": 0.4232, "loss_nan_ranks": 0, "loss_rank_avg": 0.10618533939123154, "step": 435, "valid_targets_mean": 2037.2, "valid_targets_min": 322 }, { "epoch": 2.8064, "grad_norm": 0.38084555075678805, "learning_rate": 1.937712202342881e-05, "loss": 0.4142, "loss_nan_ranks": 0, "loss_rank_avg": 0.1141984686255455, "step": 440, "valid_targets_mean": 2340.9, "valid_targets_min": 324 }, { "epoch": 2.8384, "grad_norm": 0.3747618895864175, "learning_rate": 1.8932543944998037e-05, "loss": 0.4239, "loss_nan_ranks": 0, "loss_rank_avg": 0.11149945110082626, "step": 445, "valid_targets_mean": 2282.6, "valid_targets_min": 301 }, { "epoch": 2.8704, "grad_norm": 0.36122028652603694, "learning_rate": 1.8488494266119877e-05, "loss": 0.4257, "loss_nan_ranks": 0, "loss_rank_avg": 0.10546134412288666, "step": 450, "valid_targets_mean": 1946.1, "valid_targets_min": 250 }, { "epoch": 2.9024, "grad_norm": 0.3861093775015299, "learning_rate": 1.804519279504834e-05, "loss": 0.4035, "loss_nan_ranks": 0, "loss_rank_avg": 0.09503986686468124, "step": 455, "valid_targets_mean": 1542.2, "valid_targets_min": 360 }, { "epoch": 2.9344, "grad_norm": 0.39801392048889755, "learning_rate": 1.7602858969668365e-05, "loss": 0.4114, "loss_nan_ranks": 0, "loss_rank_avg": 0.12231907248497009, "step": 460, "valid_targets_mean": 2036.3, "valid_targets_min": 371 }, { "epoch": 2.9664, "grad_norm": 0.3526144818299693, "learning_rate": 1.716171174887231e-05, "loss": 0.4189, "loss_nan_ranks": 0, "loss_rank_avg": 0.1124625951051712, "step": 465, "valid_targets_mean": 2496.9, "valid_targets_min": 718 }, { "epoch": 2.9984, "grad_norm": 0.3570444775783306, "learning_rate": 1.6721969504173484e-05, "loss": 0.4076, "loss_nan_ranks": 0, "loss_rank_avg": 0.11433712393045425, "step": 470, "valid_targets_mean": 2343.1, "valid_targets_min": 285 }, { "epoch": 3.0256, "grad_norm": 0.3782860045128177, "learning_rate": 1.628384991161041e-05, "loss": 0.3971, "loss_nan_ranks": 0, "loss_rank_avg": 0.07901632785797119, "step": 475, "valid_targets_mean": 1488.1, "valid_targets_min": 303 }, { "epoch": 3.0576, "grad_norm": 0.4111319002981645, "learning_rate": 1.5847569843995452e-05, "loss": 0.4112, "loss_nan_ranks": 0, "loss_rank_avg": 0.12275226414203644, "step": 480, "valid_targets_mean": 1940.6, "valid_targets_min": 386 }, { "epoch": 3.0896, "grad_norm": 0.36391313472494724, "learning_rate": 1.5413345263560922e-05, "loss": 0.3998, "loss_nan_ranks": 0, "loss_rank_avg": 0.11102934926748276, "step": 485, "valid_targets_mean": 2418.6, "valid_targets_min": 368 }, { "epoch": 3.1216, "grad_norm": 0.3856519763677011, "learning_rate": 1.4981391115056032e-05, "loss": 0.4063, "loss_nan_ranks": 0, "loss_rank_avg": 0.12251491099596024, "step": 490, "valid_targets_mean": 2434.8, "valid_targets_min": 370 }, { "epoch": 3.1536, "grad_norm": 0.37348077007208647, "learning_rate": 1.455192121934748e-05, "loss": 0.4045, "loss_nan_ranks": 0, "loss_rank_avg": 0.089664027094841, "step": 495, "valid_targets_mean": 1768.3, "valid_targets_min": 314 }, { "epoch": 3.1856, "grad_norm": 0.38847455569271166, "learning_rate": 1.4125148167576303e-05, "loss": 0.4133, "loss_nan_ranks": 0, "loss_rank_avg": 0.09874036908149719, "step": 500, "valid_targets_mean": 1779.1, "valid_targets_min": 306 }, { "epoch": 3.2176, "grad_norm": 0.37565771412703225, "learning_rate": 1.3701283215923563e-05, "loss": 0.4047, "loss_nan_ranks": 0, "loss_rank_avg": 0.10693678259849548, "step": 505, "valid_targets_mean": 2227.6, "valid_targets_min": 300 }, { "epoch": 3.2496, "grad_norm": 0.3405599163913066, "learning_rate": 1.328053618103677e-05, "loss": 0.3894, "loss_nan_ranks": 0, "loss_rank_avg": 0.07875536382198334, "step": 510, "valid_targets_mean": 1762.2, "valid_targets_min": 235 }, { "epoch": 3.2816, "grad_norm": 0.35645111497443527, "learning_rate": 1.2863115336168916e-05, "loss": 0.4069, "loss_nan_ranks": 0, "loss_rank_avg": 0.09105764329433441, "step": 515, "valid_targets_mean": 1974.8, "valid_targets_min": 362 }, { "epoch": 3.3136, "grad_norm": 0.3784886038562368, "learning_rate": 1.2449227308081509e-05, "loss": 0.4057, "loss_nan_ranks": 0, "loss_rank_avg": 0.10320840775966644, "step": 520, "valid_targets_mean": 2112.4, "valid_targets_min": 247 }, { "epoch": 3.3456, "grad_norm": 0.3565181056693481, "learning_rate": 1.2039076974762587e-05, "loss": 0.3943, "loss_nan_ranks": 0, "loss_rank_avg": 0.06601577997207642, "step": 525, "valid_targets_mean": 1137.8, "valid_targets_min": 273 }, { "epoch": 3.3776, "grad_norm": 0.3878554033796266, "learning_rate": 1.163286736401044e-05, "loss": 0.4116, "loss_nan_ranks": 0, "loss_rank_avg": 0.12445764988660812, "step": 530, "valid_targets_mean": 2428.8, "valid_targets_min": 323 }, { "epoch": 3.4096, "grad_norm": 0.3793731856250669, "learning_rate": 1.123079955293322e-05, "loss": 0.4065, "loss_nan_ranks": 0, "loss_rank_avg": 0.09462922811508179, "step": 535, "valid_targets_mean": 1729.9, "valid_targets_min": 271 }, { "epoch": 3.4416, "grad_norm": 0.4042644798725892, "learning_rate": 1.0833072568414037e-05, "loss": 0.4012, "loss_nan_ranks": 0, "loss_rank_avg": 0.10817129909992218, "step": 540, "valid_targets_mean": 1938.6, "valid_targets_min": 284 }, { "epoch": 3.4736000000000002, "grad_norm": 0.3481176250844818, "learning_rate": 1.0439883288591057e-05, "loss": 0.3879, "loss_nan_ranks": 0, "loss_rank_avg": 0.07519026100635529, "step": 545, "valid_targets_mean": 1742.8, "valid_targets_min": 286 }, { "epoch": 3.5056000000000003, "grad_norm": 0.34662221229569934, "learning_rate": 1.0051426345401202e-05, "loss": 0.4014, "loss_nan_ranks": 0, "loss_rank_avg": 0.103794626891613, "step": 550, "valid_targets_mean": 2505.3, "valid_targets_min": 445 }, { "epoch": 3.5376, "grad_norm": 0.36409246240396004, "learning_rate": 9.667894028235704e-06, "loss": 0.4112, "loss_nan_ranks": 0, "loss_rank_avg": 0.11506909132003784, "step": 555, "valid_targets_mean": 2698.3, "valid_targets_min": 377 }, { "epoch": 3.5696, "grad_norm": 0.3599901892947728, "learning_rate": 9.289476188755315e-06, "loss": 0.4041, "loss_nan_ranks": 0, "loss_rank_avg": 0.11729270219802856, "step": 560, "valid_targets_mean": 2483.1, "valid_targets_min": 350 }, { "epoch": 3.6016, "grad_norm": 0.41221244321821643, "learning_rate": 8.916360146912122e-06, "loss": 0.4141, "loss_nan_ranks": 0, "loss_rank_avg": 0.11008159816265106, "step": 565, "valid_targets_mean": 1742.4, "valid_targets_min": 294 }, { "epoch": 3.6336, "grad_norm": 0.38579769472917613, "learning_rate": 8.548730598224646e-06, "loss": 0.4064, "loss_nan_ranks": 0, "loss_rank_avg": 0.10939182341098785, "step": 570, "valid_targets_mean": 1960.1, "valid_targets_min": 298 }, { "epoch": 3.6656, "grad_norm": 0.34701920283352866, "learning_rate": 8.186769522352053e-06, "loss": 0.3852, "loss_nan_ranks": 0, "loss_rank_avg": 0.08542507141828537, "step": 575, "valid_targets_mean": 2168.1, "valid_targets_min": 313 }, { "epoch": 3.6976, "grad_norm": 0.35591046641387736, "learning_rate": 7.830656093012714e-06, "loss": 0.3976, "loss_nan_ranks": 0, "loss_rank_avg": 0.08797971904277802, "step": 580, "valid_targets_mean": 1935.6, "valid_targets_min": 260 }, { "epoch": 3.7296, "grad_norm": 0.36554391070875364, "learning_rate": 7.480566589291696e-06, "loss": 0.3918, "loss_nan_ranks": 0, "loss_rank_avg": 0.08623679727315903, "step": 585, "valid_targets_mean": 1880.6, "valid_targets_min": 237 }, { "epoch": 3.7616, "grad_norm": 0.3425011546047973, "learning_rate": 7.1366743083812285e-06, "loss": 0.3927, "loss_nan_ranks": 0, "loss_rank_avg": 0.09836140275001526, "step": 590, "valid_targets_mean": 2424.9, "valid_targets_min": 442 }, { "epoch": 3.7936, "grad_norm": 0.3765252614338545, "learning_rate": 6.799149479797101e-06, "loss": 0.4054, "loss_nan_ranks": 0, "loss_rank_avg": 0.08145788311958313, "step": 595, "valid_targets_mean": 1425.4, "valid_targets_min": 336 }, { "epoch": 3.8256, "grad_norm": 0.3824768824838364, "learning_rate": 6.4681591811137e-06, "loss": 0.3969, "loss_nan_ranks": 0, "loss_rank_avg": 0.09653936326503754, "step": 600, "valid_targets_mean": 1699.6, "valid_targets_min": 254 }, { "epoch": 3.8576, "grad_norm": 0.36500231491269797, "learning_rate": 6.143867255259197e-06, "loss": 0.4014, "loss_nan_ranks": 0, "loss_rank_avg": 0.09021585434675217, "step": 605, "valid_targets_mean": 1811.6, "valid_targets_min": 260 }, { "epoch": 3.8895999999999997, "grad_norm": 0.3658932400992029, "learning_rate": 5.8264342294119504e-06, "loss": 0.4046, "loss_nan_ranks": 0, "loss_rank_avg": 0.08748981356620789, "step": 610, "valid_targets_mean": 2117.2, "valid_targets_min": 453 }, { "epoch": 3.9215999999999998, "grad_norm": 0.3575561480247379, "learning_rate": 5.516017235538258e-06, "loss": 0.3894, "loss_nan_ranks": 0, "loss_rank_avg": 0.10731581598520279, "step": 615, "valid_targets_mean": 2621.9, "valid_targets_min": 351 }, { "epoch": 3.9536, "grad_norm": 0.3860612566250885, "learning_rate": 5.212769932610695e-06, "loss": 0.4033, "loss_nan_ranks": 0, "loss_rank_avg": 0.1335444152355194, "step": 620, "valid_targets_mean": 2516.4, "valid_targets_min": 332 }, { "epoch": 3.9856, "grad_norm": 0.37745456277448003, "learning_rate": 4.916842430545681e-06, "loss": 0.4117, "loss_nan_ranks": 0, "loss_rank_avg": 0.10873982310295105, "step": 625, "valid_targets_mean": 2249.4, "valid_targets_min": 462 }, { "epoch": 4.0128, "grad_norm": 0.3751320797697729, "learning_rate": 4.628381215897837e-06, "loss": 0.4057, "loss_nan_ranks": 0, "loss_rank_avg": 0.10071326792240143, "step": 630, "valid_targets_mean": 1787.6, "valid_targets_min": 333 }, { "epoch": 4.0448, "grad_norm": 0.3647280059130847, "learning_rate": 4.347529079347914e-06, "loss": 0.4015, "loss_nan_ranks": 0, "loss_rank_avg": 0.10397302359342575, "step": 635, "valid_targets_mean": 1827.6, "valid_targets_min": 264 }, { "epoch": 4.0768, "grad_norm": 0.38434075786348443, "learning_rate": 4.074425045020247e-06, "loss": 0.4044, "loss_nan_ranks": 0, "loss_rank_avg": 0.10799585282802582, "step": 640, "valid_targets_mean": 1992.3, "valid_targets_min": 393 }, { "epoch": 4.1088, "grad_norm": 0.3616337873434501, "learning_rate": 3.8092043016646487e-06, "loss": 0.3924, "loss_nan_ranks": 0, "loss_rank_avg": 0.0747368261218071, "step": 645, "valid_targets_mean": 1762.6, "valid_targets_min": 271 }, { "epoch": 4.1408, "grad_norm": 0.33582554421154587, "learning_rate": 3.551998135736867e-06, "loss": 0.4057, "loss_nan_ranks": 0, "loss_rank_avg": 0.09978146106004715, "step": 650, "valid_targets_mean": 2327.4, "valid_targets_min": 480 }, { "epoch": 4.1728, "grad_norm": 0.3586817912169902, "learning_rate": 3.3029338664107267e-06, "loss": 0.3925, "loss_nan_ranks": 0, "loss_rank_avg": 0.1346765011548996, "step": 655, "valid_targets_mean": 2910.4, "valid_targets_min": 504 }, { "epoch": 4.2048, "grad_norm": 0.4139262451971569, "learning_rate": 3.0621347825540625e-06, "loss": 0.3944, "loss_nan_ranks": 0, "loss_rank_avg": 0.08001936227083206, "step": 660, "valid_targets_mean": 1368.4, "valid_targets_min": 396 }, { "epoch": 4.2368, "grad_norm": 0.4115225371782235, "learning_rate": 2.8297200816997183e-06, "loss": 0.3966, "loss_nan_ranks": 0, "loss_rank_avg": 0.1048380583524704, "step": 665, "valid_targets_mean": 1863.4, "valid_targets_min": 488 }, { "epoch": 4.2688, "grad_norm": 0.3443567441515153, "learning_rate": 2.605804811041803e-06, "loss": 0.3895, "loss_nan_ranks": 0, "loss_rank_avg": 0.0933392196893692, "step": 670, "valid_targets_mean": 1963.9, "valid_targets_min": 340 }, { "epoch": 4.3008, "grad_norm": 0.39207406274054746, "learning_rate": 2.390499810486351e-06, "loss": 0.4051, "loss_nan_ranks": 0, "loss_rank_avg": 0.107335664331913, "step": 675, "valid_targets_mean": 1951.6, "valid_targets_min": 330 }, { "epoch": 4.3328, "grad_norm": 0.3831511138299672, "learning_rate": 2.183911657784685e-06, "loss": 0.3932, "loss_nan_ranks": 0, "loss_rank_avg": 0.09761779755353928, "step": 680, "valid_targets_mean": 1720.6, "valid_targets_min": 244 }, { "epoch": 4.3648, "grad_norm": 0.33067368701100003, "learning_rate": 1.986142615776532e-06, "loss": 0.3914, "loss_nan_ranks": 0, "loss_rank_avg": 0.12047050893306732, "step": 685, "valid_targets_mean": 2696.1, "valid_targets_min": 315 }, { "epoch": 4.3968, "grad_norm": 0.34558136798716776, "learning_rate": 1.7972905817690644e-06, "loss": 0.3972, "loss_nan_ranks": 0, "loss_rank_avg": 0.11677656322717667, "step": 690, "valid_targets_mean": 2567.2, "valid_targets_min": 395 }, { "epoch": 4.4288, "grad_norm": 0.37835131228673047, "learning_rate": 1.617449039076955e-06, "loss": 0.3951, "loss_nan_ranks": 0, "loss_rank_avg": 0.09341083467006683, "step": 695, "valid_targets_mean": 1737.2, "valid_targets_min": 256 }, { "epoch": 4.4608, "grad_norm": 0.3690394775263748, "learning_rate": 1.4467070107473413e-06, "loss": 0.3924, "loss_nan_ranks": 0, "loss_rank_avg": 0.10264524072408676, "step": 700, "valid_targets_mean": 1922.0, "valid_targets_min": 327 }, { "epoch": 4.4928, "grad_norm": 0.3624189470755675, "learning_rate": 1.2851490154926816e-06, "loss": 0.3877, "loss_nan_ranks": 0, "loss_rank_avg": 0.10924498736858368, "step": 705, "valid_targets_mean": 2328.4, "valid_targets_min": 284 }, { "epoch": 4.5248, "grad_norm": 0.34342301027294764, "learning_rate": 1.1328550258533211e-06, "loss": 0.3987, "loss_nan_ranks": 0, "loss_rank_avg": 0.07789184898138046, "step": 710, "valid_targets_mean": 1557.6, "valid_targets_min": 296 }, { "epoch": 4.5568, "grad_norm": 0.35468712537439756, "learning_rate": 9.899004286103953e-07, "loss": 0.4069, "loss_nan_ranks": 0, "loss_rank_avg": 0.13266167044639587, "step": 715, "valid_targets_mean": 2674.4, "valid_targets_min": 272 }, { "epoch": 4.5888, "grad_norm": 0.3571894649558728, "learning_rate": 8.5635598746876e-07, "loss": 0.391, "loss_nan_ranks": 0, "loss_rank_avg": 0.09895232319831848, "step": 720, "valid_targets_mean": 1886.8, "valid_targets_min": 262 }, { "epoch": 4.6208, "grad_norm": 0.35819127523592137, "learning_rate": 7.32287808028389e-07, "loss": 0.3912, "loss_nan_ranks": 0, "loss_rank_avg": 0.09150193631649017, "step": 725, "valid_targets_mean": 1706.1, "valid_targets_min": 280 }, { "epoch": 4.6528, "grad_norm": 0.3534633773953295, "learning_rate": 6.177573050615327e-07, "loss": 0.3872, "loss_nan_ranks": 0, "loss_rank_avg": 0.11433575302362442, "step": 730, "valid_targets_mean": 2423.9, "valid_targets_min": 276 }, { "epoch": 4.6848, "grad_norm": 0.34816971611805353, "learning_rate": 5.128211721119213e-07, "loss": 0.3849, "loss_nan_ranks": 0, "loss_rank_avg": 0.10196089744567871, "step": 735, "valid_targets_mean": 2286.8, "valid_targets_min": 340 }, { "epoch": 4.7168, "grad_norm": 0.35701025072552006, "learning_rate": 4.175313534309755e-07, "loss": 0.407, "loss_nan_ranks": 0, "loss_rank_avg": 0.11588802933692932, "step": 740, "valid_targets_mean": 2558.3, "valid_targets_min": 814 }, { "epoch": 4.7488, "grad_norm": 0.3506649407424135, "learning_rate": 3.319350182649861e-07, "loss": 0.3901, "loss_nan_ranks": 0, "loss_rank_avg": 0.10128729045391083, "step": 745, "valid_targets_mean": 2235.0, "valid_targets_min": 405 }, { "epoch": 4.7808, "grad_norm": 0.35767694845862597, "learning_rate": 2.560745375059392e-07, "loss": 0.3899, "loss_nan_ranks": 0, "loss_rank_avg": 0.09733337163925171, "step": 750, "valid_targets_mean": 1928.8, "valid_targets_min": 311 }, { "epoch": 4.8128, "grad_norm": 0.3512648513783525, "learning_rate": 1.8998746271758016e-07, "loss": 0.3986, "loss_nan_ranks": 0, "loss_rank_avg": 0.1237851232290268, "step": 755, "valid_targets_mean": 2480.7, "valid_targets_min": 274 }, { "epoch": 4.8448, "grad_norm": 0.3696654818413258, "learning_rate": 1.337065075470778e-07, "loss": 0.3909, "loss_nan_ranks": 0, "loss_rank_avg": 0.09096920490264893, "step": 760, "valid_targets_mean": 1573.8, "valid_targets_min": 350 }, { "epoch": 4.8768, "grad_norm": 0.35499667325333933, "learning_rate": 8.725953153150279e-08, "loss": 0.3834, "loss_nan_ranks": 0, "loss_rank_avg": 0.1062077134847641, "step": 765, "valid_targets_mean": 2201.6, "valid_targets_min": 742 }, { "epoch": 4.9088, "grad_norm": 0.3527505973814082, "learning_rate": 5.066952630711886e-08, "loss": 0.4016, "loss_nan_ranks": 0, "loss_rank_avg": 0.1002032607793808, "step": 770, "valid_targets_mean": 2047.4, "valid_targets_min": 293 }, { "epoch": 4.9408, "grad_norm": 0.3765275127049218, "learning_rate": 2.3954604228342283e-08, "loss": 0.3988, "loss_nan_ranks": 0, "loss_rank_avg": 0.08276014029979706, "step": 775, "valid_targets_mean": 1657.9, "valid_targets_min": 336 }, { "epoch": 4.9728, "grad_norm": 0.39662809214704003, "learning_rate": 7.12798940197601e-09, "loss": 0.4008, "loss_nan_ranks": 0, "loss_rank_avg": 0.0951353907585144, "step": 780, "valid_targets_mean": 1727.6, "valid_targets_min": 281 }, { "epoch": 5.0, "grad_norm": 0.7908188895892961, "learning_rate": 1.9801114115480802e-10, "loss": 0.404, "loss_nan_ranks": 0, "loss_rank_avg": 0.42353275418281555, "step": 785, "valid_targets_mean": 1574.1, "valid_targets_min": 271 }, { "epoch": 5.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.42353275418281555, "step": 785, "total_flos": 7.570651310488289e+17, "train_loss": 0.4416168404232924, "train_runtime": 12176.8728, "train_samples_per_second": 4.105, "train_steps_per_second": 0.064, "valid_targets_mean": 1574.1, "valid_targets_min": 271 } ], "logging_steps": 5, "max_steps": 785, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.570651310488289e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }