| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 14.0, | |
| "global_step": 75124, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-09, | |
| "loss": 10.4893, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.5e-06, | |
| "loss": 9.3442, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 5e-06, | |
| "loss": 7.4232, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 7.5e-06, | |
| "loss": 6.0986, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1e-05, | |
| "loss": 5.8257, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.25e-05, | |
| "loss": 5.7081, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 1.5e-05, | |
| "loss": 5.6336, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.75e-05, | |
| "loss": 5.5724, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 2e-05, | |
| "loss": 5.529, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 2.25e-05, | |
| "loss": 5.4913, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.5e-05, | |
| "loss": 5.4578, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 5.4299, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3e-05, | |
| "loss": 5.4036, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 5.3821, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 3.5e-05, | |
| "loss": 5.3594, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 5.3419, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 4e-05, | |
| "loss": 5.321, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 4.2495e-05, | |
| "loss": 5.3034, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 4.4995000000000005e-05, | |
| "loss": 5.2938, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 4.7495e-05, | |
| "loss": 5.2774, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 4.9995000000000005e-05, | |
| "loss": 5.2669, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 4.99883448792361e-05, | |
| "loss": 5.2542, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 4.997664295075829e-05, | |
| "loss": 5.2418, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 4.9964941022280475e-05, | |
| "loss": 5.231, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 4.995323909380266e-05, | |
| "loss": 5.2201, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 4.994156056918181e-05, | |
| "loss": 5.2114, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 4.992985864070399e-05, | |
| "loss": 5.2043, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 4.9918156712226175e-05, | |
| "loss": 5.194, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 4.9906454783748366e-05, | |
| "loss": 5.1832, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 4.989475285527055e-05, | |
| "loss": 5.1801, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 4.988307433064969e-05, | |
| "loss": 5.1721, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 4.987137240217188e-05, | |
| "loss": 5.1657, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 4.9859670473694066e-05, | |
| "loss": 5.16, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 4.984796854521626e-05, | |
| "loss": 5.1548, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 4.983629002059539e-05, | |
| "loss": 5.1467, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 4.982458809211759e-05, | |
| "loss": 5.1421, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 4.981288616363977e-05, | |
| "loss": 5.1356, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 4.980118423516196e-05, | |
| "loss": 5.1325, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 4.97895057105411e-05, | |
| "loss": 5.1272, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 4.977780378206329e-05, | |
| "loss": 5.1207, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 4.976610185358547e-05, | |
| "loss": 5.1182, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 4.975439992510766e-05, | |
| "loss": 5.1137, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 4.974269799662985e-05, | |
| "loss": 5.1099, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 4.973101947200899e-05, | |
| "loss": 5.1054, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 4.971931754353117e-05, | |
| "loss": 5.0999, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 4.9707615615053363e-05, | |
| "loss": 5.0948, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 4.9695913686575554e-05, | |
| "loss": 5.0925, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 4.968423516195469e-05, | |
| "loss": 5.0874, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 4.967253323347688e-05, | |
| "loss": 5.0848, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 4.966083130499907e-05, | |
| "loss": 5.0815, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 4.9649129376521254e-05, | |
| "loss": 5.0802, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 4.9637450851900395e-05, | |
| "loss": 5.0777, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 4.962574892342258e-05, | |
| "loss": 5.0732, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 4.961404699494477e-05, | |
| "loss": 5.0705, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 4.9602345066466954e-05, | |
| "loss": 5.0673, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 4.9590666541846095e-05, | |
| "loss": 5.0608, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 4.9578964613368286e-05, | |
| "loss": 5.0599, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 4.956726268489047e-05, | |
| "loss": 5.0567, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 4.955556075641266e-05, | |
| "loss": 5.0523, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 4.95438822317918e-05, | |
| "loss": 5.051, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 4.953218030331399e-05, | |
| "loss": 5.0469, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 4.952047837483618e-05, | |
| "loss": 5.0424, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 4.950877644635836e-05, | |
| "loss": 5.0405, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 4.949709792173751e-05, | |
| "loss": 5.0073, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 4.948539599325969e-05, | |
| "loss": 4.6646, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 4.9473694064781877e-05, | |
| "loss": 4.3483, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 4.946199213630406e-05, | |
| "loss": 4.0878, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 4.945031361168321e-05, | |
| "loss": 3.8246, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 4.943861168320539e-05, | |
| "loss": 3.221, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 6.43, | |
| "learning_rate": 4.9426909754727577e-05, | |
| "loss": 2.7026, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 4.9415207826249774e-05, | |
| "loss": 2.3592, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 4.940352930162891e-05, | |
| "loss": 1.9468, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "learning_rate": 4.939182737315109e-05, | |
| "loss": 1.6962, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 4.938012544467328e-05, | |
| "loss": 1.5455, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 4.9368423516195474e-05, | |
| "loss": 1.4404, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 4.9356744991574615e-05, | |
| "loss": 1.3671, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "learning_rate": 4.93450430630968e-05, | |
| "loss": 1.3047, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "learning_rate": 4.933334113461899e-05, | |
| "loss": 1.242, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 4.9321639206141174e-05, | |
| "loss": 1.1857, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "learning_rate": 4.930993727766336e-05, | |
| "loss": 1.1364, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 4.9298258753042506e-05, | |
| "loss": 1.0976, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "learning_rate": 4.928655682456469e-05, | |
| "loss": 1.062, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "learning_rate": 4.9274854896086874e-05, | |
| "loss": 1.0284, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "learning_rate": 4.9263152967609065e-05, | |
| "loss": 1.0023, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 4.9251474442988206e-05, | |
| "loss": 0.98, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "learning_rate": 4.9239772514510397e-05, | |
| "loss": 0.9582, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 8.01, | |
| "learning_rate": 4.922809398988954e-05, | |
| "loss": 0.9424, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 8.11, | |
| "learning_rate": 4.921639206141173e-05, | |
| "loss": 0.9232, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 8.2, | |
| "learning_rate": 4.920469013293391e-05, | |
| "loss": 0.908, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 8.29, | |
| "learning_rate": 4.9192988204456097e-05, | |
| "loss": 0.8941, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 4.918128627597828e-05, | |
| "loss": 0.8833, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 4.916958434750047e-05, | |
| "loss": 0.8697, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 8.57, | |
| "learning_rate": 4.9157882419022655e-05, | |
| "loss": 0.8558, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 8.67, | |
| "learning_rate": 4.9146203894401796e-05, | |
| "loss": 0.8465, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 8.76, | |
| "learning_rate": 4.913450196592399e-05, | |
| "loss": 0.8352, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "learning_rate": 4.912280003744618e-05, | |
| "loss": 0.8253, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "learning_rate": 4.911109810896836e-05, | |
| "loss": 0.8135, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "learning_rate": 4.9099396180490546e-05, | |
| "loss": 0.8064, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "learning_rate": 4.908769425201274e-05, | |
| "loss": 0.7971, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "learning_rate": 4.907601572739188e-05, | |
| "loss": 0.7846, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "learning_rate": 4.906431379891406e-05, | |
| "loss": 0.779, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 9.41, | |
| "learning_rate": 4.9052611870436246e-05, | |
| "loss": 0.7717, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "learning_rate": 4.904090994195844e-05, | |
| "loss": 0.7618, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "learning_rate": 4.902920801348063e-05, | |
| "loss": 0.7573, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 9.69, | |
| "learning_rate": 4.901752948885976e-05, | |
| "loss": 0.7505, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "learning_rate": 4.900582756038195e-05, | |
| "loss": 0.7445, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 9.88, | |
| "learning_rate": 4.8994125631904144e-05, | |
| "loss": 0.7389, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "learning_rate": 4.898242370342633e-05, | |
| "loss": 0.7314, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 10.06, | |
| "learning_rate": 4.897072177494851e-05, | |
| "loss": 0.7245, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 10.16, | |
| "learning_rate": 4.895904325032766e-05, | |
| "loss": 0.7193, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 10.25, | |
| "learning_rate": 4.8947341321849843e-05, | |
| "loss": 0.7137, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 10.34, | |
| "learning_rate": 4.893563939337203e-05, | |
| "loss": 0.7083, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 10.44, | |
| "learning_rate": 4.892393746489422e-05, | |
| "loss": 0.7039, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 10.53, | |
| "learning_rate": 4.891223553641641e-05, | |
| "loss": 0.6995, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 10.62, | |
| "learning_rate": 4.890053360793859e-05, | |
| "loss": 0.6941, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 10.72, | |
| "learning_rate": 4.888885508331773e-05, | |
| "loss": 0.6904, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 10.81, | |
| "learning_rate": 4.8877153154839925e-05, | |
| "loss": 0.6846, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 10.9, | |
| "learning_rate": 4.886545122636211e-05, | |
| "loss": 0.6806, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 4.885374929788429e-05, | |
| "loss": 0.6771, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 11.09, | |
| "learning_rate": 4.884204736940648e-05, | |
| "loss": 0.6726, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 11.18, | |
| "learning_rate": 4.8830368844785625e-05, | |
| "loss": 0.6679, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 11.27, | |
| "learning_rate": 4.881866691630781e-05, | |
| "loss": 0.6634, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 11.37, | |
| "learning_rate": 4.880696498782999e-05, | |
| "loss": 0.6607, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 11.46, | |
| "learning_rate": 4.8795263059352184e-05, | |
| "loss": 0.6568, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 11.55, | |
| "learning_rate": 4.8783561130874375e-05, | |
| "loss": 0.6546, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 11.65, | |
| "learning_rate": 4.877188260625351e-05, | |
| "loss": 0.6527, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 11.74, | |
| "learning_rate": 4.87601806777757e-05, | |
| "loss": 0.6455, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 11.83, | |
| "learning_rate": 4.874847874929789e-05, | |
| "loss": 0.6437, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 11.93, | |
| "learning_rate": 4.8736776820820074e-05, | |
| "loss": 0.6408, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 12.02, | |
| "learning_rate": 4.872507489234226e-05, | |
| "loss": 0.6369, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 12.11, | |
| "learning_rate": 4.871337296386444e-05, | |
| "loss": 0.6336, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 12.21, | |
| "learning_rate": 4.870167103538664e-05, | |
| "loss": 0.63, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 12.3, | |
| "learning_rate": 4.8689992510765774e-05, | |
| "loss": 0.6277, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 12.39, | |
| "learning_rate": 4.867829058228796e-05, | |
| "loss": 0.6265, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 12.49, | |
| "learning_rate": 4.866658865381015e-05, | |
| "loss": 0.6238, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 12.58, | |
| "learning_rate": 4.865488672533234e-05, | |
| "loss": 0.6192, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 12.67, | |
| "learning_rate": 4.864320820071148e-05, | |
| "loss": 0.6165, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 12.77, | |
| "learning_rate": 4.8631506272233665e-05, | |
| "loss": 0.613, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 12.86, | |
| "learning_rate": 4.8619804343755856e-05, | |
| "loss": 0.6124, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 12.95, | |
| "learning_rate": 4.860810241527804e-05, | |
| "loss": 0.6111, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 13.05, | |
| "learning_rate": 4.8596400486800224e-05, | |
| "loss": 0.607, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 13.14, | |
| "learning_rate": 4.858469855832241e-05, | |
| "loss": 0.602, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 13.23, | |
| "learning_rate": 4.8573020033701556e-05, | |
| "loss": 0.602, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 13.32, | |
| "learning_rate": 4.856131810522374e-05, | |
| "loss": 0.6008, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 13.42, | |
| "learning_rate": 4.854961617674593e-05, | |
| "loss": 0.5974, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 13.51, | |
| "learning_rate": 4.853791424826812e-05, | |
| "loss": 0.5939, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 13.6, | |
| "learning_rate": 4.8526212319790305e-05, | |
| "loss": 0.5942, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 13.7, | |
| "learning_rate": 4.851451039131249e-05, | |
| "loss": 0.5902, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 13.79, | |
| "learning_rate": 4.850283186669163e-05, | |
| "loss": 0.5893, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 13.88, | |
| "learning_rate": 4.849112993821382e-05, | |
| "loss": 0.5889, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 13.98, | |
| "learning_rate": 4.8479428009736005e-05, | |
| "loss": 0.5864, | |
| "step": 75000 | |
| } | |
| ], | |
| "max_steps": 2146400, | |
| "num_train_epochs": 400, | |
| "total_flos": 2.024730999544978e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |