{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1563, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0064, "grad_norm": 2.602491050405758, "learning_rate": 5.732484076433121e-07, "loss": 0.9793, "step": 10 }, { "epoch": 0.0128, "grad_norm": 1.3815022841989586, "learning_rate": 1.210191082802548e-06, "loss": 0.9695, "step": 20 }, { "epoch": 0.0192, "grad_norm": 0.9740189266404157, "learning_rate": 1.8471337579617835e-06, "loss": 0.8877, "step": 30 }, { "epoch": 0.0256, "grad_norm": 0.7784867540703468, "learning_rate": 2.4840764331210194e-06, "loss": 0.8351, "step": 40 }, { "epoch": 0.032, "grad_norm": 0.6424443086643383, "learning_rate": 3.121019108280255e-06, "loss": 0.7893, "step": 50 }, { "epoch": 0.0384, "grad_norm": 0.5976535344904226, "learning_rate": 3.757961783439491e-06, "loss": 0.7523, "step": 60 }, { "epoch": 0.0448, "grad_norm": 0.5941153103110722, "learning_rate": 4.394904458598727e-06, "loss": 0.7497, "step": 70 }, { "epoch": 0.0512, "grad_norm": 0.5677970641900663, "learning_rate": 5.031847133757962e-06, "loss": 0.719, "step": 80 }, { "epoch": 0.0576, "grad_norm": 0.5484222959455938, "learning_rate": 5.668789808917198e-06, "loss": 0.706, "step": 90 }, { "epoch": 0.064, "grad_norm": 0.5821437800951814, "learning_rate": 6.305732484076433e-06, "loss": 0.6975, "step": 100 }, { "epoch": 0.0704, "grad_norm": 0.5983878764334871, "learning_rate": 6.942675159235669e-06, "loss": 0.6955, "step": 110 }, { "epoch": 0.0768, "grad_norm": 0.6249048762628275, "learning_rate": 7.579617834394906e-06, "loss": 0.678, "step": 120 }, { "epoch": 0.0832, "grad_norm": 0.5913030399946232, "learning_rate": 8.21656050955414e-06, "loss": 0.6648, "step": 130 }, { "epoch": 0.0896, "grad_norm": 0.5975648810721754, "learning_rate": 8.853503184713377e-06, "loss": 0.6989, "step": 140 }, { "epoch": 0.096, "grad_norm": 0.6327980077077386, "learning_rate": 9.490445859872613e-06, "loss": 0.6915, "step": 150 }, { "epoch": 0.1024, "grad_norm": 0.6133561029771822, "learning_rate": 9.999950073815046e-06, "loss": 0.663, "step": 160 }, { "epoch": 0.1088, "grad_norm": 0.623322525134705, "learning_rate": 9.998202762029626e-06, "loss": 0.6715, "step": 170 }, { "epoch": 0.1152, "grad_norm": 0.6030051679185537, "learning_rate": 9.99396013795657e-06, "loss": 0.6628, "step": 180 }, { "epoch": 0.1216, "grad_norm": 0.5948663264793166, "learning_rate": 9.987224319691624e-06, "loss": 0.651, "step": 190 }, { "epoch": 0.128, "grad_norm": 0.619298355880156, "learning_rate": 9.977998670037554e-06, "loss": 0.664, "step": 200 }, { "epoch": 0.1344, "grad_norm": 0.6306816667543312, "learning_rate": 9.966287794825305e-06, "loss": 0.6593, "step": 210 }, { "epoch": 0.1408, "grad_norm": 0.6399492768626559, "learning_rate": 9.952097540614571e-06, "loss": 0.6603, "step": 220 }, { "epoch": 0.1472, "grad_norm": 0.686147202990426, "learning_rate": 9.935434991774951e-06, "loss": 0.6382, "step": 230 }, { "epoch": 0.1536, "grad_norm": 0.6168760924683282, "learning_rate": 9.916308466949134e-06, "loss": 0.6459, "step": 240 }, { "epoch": 0.16, "grad_norm": 0.6207669427874934, "learning_rate": 9.894727514899883e-06, "loss": 0.6623, "step": 250 }, { "epoch": 0.1664, "grad_norm": 0.5853238171255293, "learning_rate": 9.870702909742893e-06, "loss": 0.6329, "step": 260 }, { "epoch": 0.1728, "grad_norm": 0.6191120187612653, "learning_rate": 9.844246645567903e-06, "loss": 0.6315, "step": 270 }, { "epoch": 0.1792, "grad_norm": 0.6028415458034874, "learning_rate": 9.815371930450737e-06, "loss": 0.6376, "step": 280 }, { "epoch": 0.1856, "grad_norm": 0.6352858982108863, "learning_rate": 9.78409317985929e-06, "loss": 0.6367, "step": 290 }, { "epoch": 0.192, "grad_norm": 0.5929591423019905, "learning_rate": 9.750426009456713e-06, "loss": 0.6429, "step": 300 }, { "epoch": 0.1984, "grad_norm": 0.573820816300432, "learning_rate": 9.714387227305422e-06, "loss": 0.646, "step": 310 }, { "epoch": 0.2048, "grad_norm": 0.6112652527248457, "learning_rate": 9.67599482547581e-06, "loss": 0.6536, "step": 320 }, { "epoch": 0.2112, "grad_norm": 0.5733625525682681, "learning_rate": 9.635267971063848e-06, "loss": 0.6259, "step": 330 }, { "epoch": 0.2176, "grad_norm": 0.5877077600359397, "learning_rate": 9.59222699662208e-06, "loss": 0.6408, "step": 340 }, { "epoch": 0.224, "grad_norm": 0.6227632791247796, "learning_rate": 9.546893390008737e-06, "loss": 0.6295, "step": 350 }, { "epoch": 0.2304, "grad_norm": 0.5869030258388684, "learning_rate": 9.499289783660126e-06, "loss": 0.6234, "step": 360 }, { "epoch": 0.2368, "grad_norm": 0.5639510217330366, "learning_rate": 9.449439943291541e-06, "loss": 0.6262, "step": 370 }, { "epoch": 0.2432, "grad_norm": 0.5774729951455163, "learning_rate": 9.397368756032445e-06, "loss": 0.6314, "step": 380 }, { "epoch": 0.2496, "grad_norm": 0.5907290438974562, "learning_rate": 9.343102218001763e-06, "loss": 0.6091, "step": 390 }, { "epoch": 0.256, "grad_norm": 0.6058953897686866, "learning_rate": 9.286667421329523e-06, "loss": 0.6168, "step": 400 }, { "epoch": 0.2624, "grad_norm": 0.5615945775592219, "learning_rate": 9.228092540631342e-06, "loss": 0.6236, "step": 410 }, { "epoch": 0.2688, "grad_norm": 0.6149356167277431, "learning_rate": 9.167406818942468e-06, "loss": 0.6128, "step": 420 }, { "epoch": 0.2752, "grad_norm": 0.59652556732115, "learning_rate": 9.104640553118436e-06, "loss": 0.6173, "step": 430 }, { "epoch": 0.2816, "grad_norm": 0.6090494133930333, "learning_rate": 9.039825078709606e-06, "loss": 0.6127, "step": 440 }, { "epoch": 0.288, "grad_norm": 0.5854813559657089, "learning_rate": 8.972992754317144e-06, "loss": 0.6222, "step": 450 }, { "epoch": 0.2944, "grad_norm": 0.5440284971719938, "learning_rate": 8.904176945438255e-06, "loss": 0.6168, "step": 460 }, { "epoch": 0.3008, "grad_norm": 0.5847118446075076, "learning_rate": 8.833412007808714e-06, "loss": 0.6155, "step": 470 }, { "epoch": 0.3072, "grad_norm": 0.5413681495836723, "learning_rate": 8.760733270251065e-06, "loss": 0.6279, "step": 480 }, { "epoch": 0.3136, "grad_norm": 0.5581663589007605, "learning_rate": 8.686177017036979e-06, "loss": 0.6083, "step": 490 }, { "epoch": 0.32, "grad_norm": 0.5623656354778915, "learning_rate": 8.609780469772623e-06, "loss": 0.6223, "step": 500 }, { "epoch": 0.3264, "grad_norm": 0.5406095446625827, "learning_rate": 8.531581768816085e-06, "loss": 0.6101, "step": 510 }, { "epoch": 0.3328, "grad_norm": 0.5563849090211112, "learning_rate": 8.451619954236093e-06, "loss": 0.6148, "step": 520 }, { "epoch": 0.3392, "grad_norm": 0.5446894549752176, "learning_rate": 8.369934946321594e-06, "loss": 0.6195, "step": 530 }, { "epoch": 0.3456, "grad_norm": 0.5661587117381649, "learning_rate": 8.286567525651865e-06, "loss": 0.6187, "step": 540 }, { "epoch": 0.352, "grad_norm": 0.562636023418118, "learning_rate": 8.201559312737131e-06, "loss": 0.6031, "step": 550 }, { "epoch": 0.3584, "grad_norm": 0.5616956304791916, "learning_rate": 8.114952747239876e-06, "loss": 0.6129, "step": 560 }, { "epoch": 0.3648, "grad_norm": 0.5435561658248812, "learning_rate": 8.026791066787177e-06, "loss": 0.5952, "step": 570 }, { "epoch": 0.3712, "grad_norm": 0.5361005508992319, "learning_rate": 7.937118285384666e-06, "loss": 0.6068, "step": 580 }, { "epoch": 0.3776, "grad_norm": 0.5226490469878938, "learning_rate": 7.8459791714429e-06, "loss": 0.6048, "step": 590 }, { "epoch": 0.384, "grad_norm": 0.5933400769760249, "learning_rate": 7.753419225427097e-06, "loss": 0.6095, "step": 600 }, { "epoch": 0.3904, "grad_norm": 0.5786114502500966, "learning_rate": 7.659484657141382e-06, "loss": 0.5934, "step": 610 }, { "epoch": 0.3968, "grad_norm": 0.5606266353064333, "learning_rate": 7.564222362658935e-06, "loss": 0.6168, "step": 620 }, { "epoch": 0.4032, "grad_norm": 0.5186129092695043, "learning_rate": 7.467679900909489e-06, "loss": 0.5898, "step": 630 }, { "epoch": 0.4096, "grad_norm": 0.5100514654686459, "learning_rate": 7.369905469935935e-06, "loss": 0.6062, "step": 640 }, { "epoch": 0.416, "grad_norm": 0.5159716940208071, "learning_rate": 7.270947882831823e-06, "loss": 0.6072, "step": 650 }, { "epoch": 0.4224, "grad_norm": 0.5530420948296727, "learning_rate": 7.1708565433718354e-06, "loss": 0.6109, "step": 660 }, { "epoch": 0.4288, "grad_norm": 0.5452725277736225, "learning_rate": 7.06968142134734e-06, "loss": 0.6012, "step": 670 }, { "epoch": 0.4352, "grad_norm": 0.5822094391610059, "learning_rate": 6.967473027619381e-06, "loss": 0.6096, "step": 680 }, { "epoch": 0.4416, "grad_norm": 0.569478483592532, "learning_rate": 6.864282388901544e-06, "loss": 0.6216, "step": 690 }, { "epoch": 0.448, "grad_norm": 0.5126675290307857, "learning_rate": 6.760161022285274e-06, "loss": 0.6061, "step": 700 }, { "epoch": 0.4544, "grad_norm": 0.5243827379833276, "learning_rate": 6.655160909520391e-06, "loss": 0.6218, "step": 710 }, { "epoch": 0.4608, "grad_norm": 0.5592023721811629, "learning_rate": 6.54933447106362e-06, "loss": 0.6032, "step": 720 }, { "epoch": 0.4672, "grad_norm": 0.5552926937288394, "learning_rate": 6.4427345399081e-06, "loss": 0.6028, "step": 730 }, { "epoch": 0.4736, "grad_norm": 0.5374219261496646, "learning_rate": 6.3354143352069415e-06, "loss": 0.5962, "step": 740 }, { "epoch": 0.48, "grad_norm": 0.543839242828281, "learning_rate": 6.227427435703997e-06, "loss": 0.6017, "step": 750 }, { "epoch": 0.4864, "grad_norm": 0.5161807608943537, "learning_rate": 6.1188277529851015e-06, "loss": 0.5996, "step": 760 }, { "epoch": 0.4928, "grad_norm": 0.5246213393205181, "learning_rate": 6.009669504563154e-06, "loss": 0.5912, "step": 770 }, { "epoch": 0.4992, "grad_norm": 0.5305380697629275, "learning_rate": 5.900007186810461e-06, "loss": 0.589, "step": 780 }, { "epoch": 0.5056, "grad_norm": 0.5636277212013151, "learning_rate": 5.789895547751867e-06, "loss": 0.6091, "step": 790 }, { "epoch": 0.512, "grad_norm": 0.4830388273531845, "learning_rate": 5.679389559732234e-06, "loss": 0.5968, "step": 800 }, { "epoch": 0.5184, "grad_norm": 0.569099222020369, "learning_rate": 5.568544391971964e-06, "loss": 0.5964, "step": 810 }, { "epoch": 0.5248, "grad_norm": 0.569030815756338, "learning_rate": 5.4574153830241905e-06, "loss": 0.6098, "step": 820 }, { "epoch": 0.5312, "grad_norm": 0.5277929827696425, "learning_rate": 5.34605801314747e-06, "loss": 0.604, "step": 830 }, { "epoch": 0.5376, "grad_norm": 0.5964226122215665, "learning_rate": 5.234527876607698e-06, "loss": 0.6061, "step": 840 }, { "epoch": 0.544, "grad_norm": 0.5518786923172931, "learning_rate": 5.122880653923134e-06, "loss": 0.5896, "step": 850 }, { "epoch": 0.5504, "grad_norm": 0.5441595997189401, "learning_rate": 5.011172084066349e-06, "loss": 0.6049, "step": 860 }, { "epoch": 0.5568, "grad_norm": 0.5592388845110271, "learning_rate": 4.899457936636988e-06, "loss": 0.6157, "step": 870 }, { "epoch": 0.5632, "grad_norm": 0.5162039340658358, "learning_rate": 4.78779398401926e-06, "loss": 0.5967, "step": 880 }, { "epoch": 0.5696, "grad_norm": 0.530466150814013, "learning_rate": 4.6762359735380135e-06, "loss": 0.5843, "step": 890 }, { "epoch": 0.576, "grad_norm": 0.5406080152114066, "learning_rate": 4.564839599627347e-06, "loss": 0.5756, "step": 900 }, { "epoch": 0.5824, "grad_norm": 0.5426594471923879, "learning_rate": 4.453660476025612e-06, "loss": 0.5973, "step": 910 }, { "epoch": 0.5888, "grad_norm": 0.5129920995886479, "learning_rate": 4.342754108010695e-06, "loss": 0.6036, "step": 920 }, { "epoch": 0.5952, "grad_norm": 0.5579303493743256, "learning_rate": 4.232175864689464e-06, "loss": 0.5948, "step": 930 }, { "epoch": 0.6016, "grad_norm": 0.5105953152534003, "learning_rate": 4.12198095135519e-06, "loss": 0.5912, "step": 940 }, { "epoch": 0.608, "grad_norm": 0.4963667660116403, "learning_rate": 4.01222438192675e-06, "loss": 0.5772, "step": 950 }, { "epoch": 0.6144, "grad_norm": 0.5739324343415901, "learning_rate": 3.902960951483375e-06, "loss": 0.5981, "step": 960 }, { "epoch": 0.6208, "grad_norm": 0.5244786705923417, "learning_rate": 3.794245208908639e-06, "loss": 0.5796, "step": 970 }, { "epoch": 0.6272, "grad_norm": 0.502830786168131, "learning_rate": 3.686131429657387e-06, "loss": 0.6052, "step": 980 }, { "epoch": 0.6336, "grad_norm": 0.5544732167298034, "learning_rate": 3.578673588659145e-06, "loss": 0.5818, "step": 990 }, { "epoch": 0.64, "grad_norm": 0.5787060987870878, "learning_rate": 3.471925333371572e-06, "loss": 0.6036, "step": 1000 }, { "epoch": 0.6464, "grad_norm": 0.5452788201166408, "learning_rate": 3.365939956997399e-06, "loss": 0.587, "step": 1010 }, { "epoch": 0.6528, "grad_norm": 0.5007730511062461, "learning_rate": 3.260770371878236e-06, "loss": 0.592, "step": 1020 }, { "epoch": 0.6592, "grad_norm": 0.5024089841179589, "learning_rate": 3.1564690830785106e-06, "loss": 0.5792, "step": 1030 }, { "epoch": 0.6656, "grad_norm": 0.519234532539304, "learning_rate": 3.053088162172734e-06, "loss": 0.5877, "step": 1040 }, { "epoch": 0.672, "grad_norm": 0.5563138859742994, "learning_rate": 2.9506792212491987e-06, "loss": 0.5745, "step": 1050 }, { "epoch": 0.6784, "grad_norm": 0.5238260182460646, "learning_rate": 2.84929338714305e-06, "loss": 0.5818, "step": 1060 }, { "epoch": 0.6848, "grad_norm": 0.5423111609793606, "learning_rate": 2.748981275911633e-06, "loss": 0.5948, "step": 1070 }, { "epoch": 0.6912, "grad_norm": 0.49707620067772024, "learning_rate": 2.6497929675648435e-06, "loss": 0.576, "step": 1080 }, { "epoch": 0.6976, "grad_norm": 0.49501432541110524, "learning_rate": 2.5517779810630725e-06, "loss": 0.5891, "step": 1090 }, { "epoch": 0.704, "grad_norm": 0.5767661257387988, "learning_rate": 2.4549852495952727e-06, "loss": 0.6031, "step": 1100 }, { "epoch": 0.7104, "grad_norm": 0.48384838215803555, "learning_rate": 2.3594630961494615e-06, "loss": 0.5821, "step": 1110 }, { "epoch": 0.7168, "grad_norm": 0.5980972963180492, "learning_rate": 2.265259209387867e-06, "loss": 0.5853, "step": 1120 }, { "epoch": 0.7232, "grad_norm": 0.5070117639493683, "learning_rate": 2.172420619838729e-06, "loss": 0.5773, "step": 1130 }, { "epoch": 0.7296, "grad_norm": 0.5137000490908732, "learning_rate": 2.0809936764167106e-06, "loss": 0.5814, "step": 1140 }, { "epoch": 0.736, "grad_norm": 0.49347172498049263, "learning_rate": 1.991024023283562e-06, "loss": 0.578, "step": 1150 }, { "epoch": 0.7424, "grad_norm": 0.5413380980607065, "learning_rate": 1.902556577060652e-06, "loss": 0.5827, "step": 1160 }, { "epoch": 0.7488, "grad_norm": 0.5233744544590831, "learning_rate": 1.8156355044047008e-06, "loss": 0.5919, "step": 1170 }, { "epoch": 0.7552, "grad_norm": 0.49453370604435004, "learning_rate": 1.7303041999579395e-06, "loss": 0.5763, "step": 1180 }, { "epoch": 0.7616, "grad_norm": 0.5213363130572087, "learning_rate": 1.6466052646836834e-06, "loss": 0.5833, "step": 1190 }, { "epoch": 0.768, "grad_norm": 0.48742401610936353, "learning_rate": 1.5645804845981443e-06, "loss": 0.5845, "step": 1200 }, { "epoch": 0.7744, "grad_norm": 0.48713701490781586, "learning_rate": 1.4842708099091046e-06, "loss": 0.5935, "step": 1210 }, { "epoch": 0.7808, "grad_norm": 0.5322629942627162, "learning_rate": 1.4057163345718532e-06, "loss": 0.5824, "step": 1220 }, { "epoch": 0.7872, "grad_norm": 0.5738998194377111, "learning_rate": 1.328956276272606e-06, "loss": 0.5842, "step": 1230 }, { "epoch": 0.7936, "grad_norm": 0.5177751611826429, "learning_rate": 1.2540289568493862e-06, "loss": 0.5809, "step": 1240 }, { "epoch": 0.8, "grad_norm": 0.49850173298593586, "learning_rate": 1.1809717831601697e-06, "loss": 0.5941, "step": 1250 }, { "epoch": 0.8064, "grad_norm": 0.5259683455570581, "learning_rate": 1.1098212284078037e-06, "loss": 0.5898, "step": 1260 }, { "epoch": 0.8128, "grad_norm": 0.5337167936665104, "learning_rate": 1.0406128139310534e-06, "loss": 0.5838, "step": 1270 }, { "epoch": 0.8192, "grad_norm": 0.5329209057328337, "learning_rate": 9.733810914708692e-07, "loss": 0.5652, "step": 1280 }, { "epoch": 0.8256, "grad_norm": 0.4947616196065444, "learning_rate": 9.08159625920711e-07, "loss": 0.5782, "step": 1290 }, { "epoch": 0.832, "grad_norm": 0.48701085842063363, "learning_rate": 8.449809785695318e-07, "loss": 0.5734, "step": 1300 }, { "epoch": 0.8384, "grad_norm": 0.5279855881653323, "learning_rate": 7.838766908458339e-07, "loss": 0.573, "step": 1310 }, { "epoch": 0.8448, "grad_norm": 0.5170615957851604, "learning_rate": 7.248772685708589e-07, "loss": 0.5657, "step": 1320 }, { "epoch": 0.8512, "grad_norm": 0.5480040759674293, "learning_rate": 6.680121667288026e-07, "loss": 0.5789, "step": 1330 }, { "epoch": 0.8576, "grad_norm": 0.5059192330369989, "learning_rate": 6.133097747616546e-07, "loss": 0.591, "step": 1340 }, { "epoch": 0.864, "grad_norm": 0.494607411693714, "learning_rate": 5.607974023959977e-07, "loss": 0.5872, "step": 1350 }, { "epoch": 0.8704, "grad_norm": 0.49607139689588314, "learning_rate": 5.105012660088493e-07, "loss": 0.583, "step": 1360 }, { "epoch": 0.8768, "grad_norm": 0.4952262177253567, "learning_rate": 4.6244647553934594e-07, "loss": 0.5658, "step": 1370 }, { "epoch": 0.8832, "grad_norm": 0.5106918911939661, "learning_rate": 4.1665702195280986e-07, "loss": 0.5958, "step": 1380 }, { "epoch": 0.8896, "grad_norm": 0.5135139977922036, "learning_rate": 3.7315576526345433e-07, "loss": 0.5776, "step": 1390 }, { "epoch": 0.896, "grad_norm": 0.49984792731606037, "learning_rate": 3.3196442312170563e-07, "loss": 0.5886, "step": 1400 }, { "epoch": 0.9024, "grad_norm": 0.510253643299972, "learning_rate": 2.931035599718396e-07, "loss": 0.5757, "step": 1410 }, { "epoch": 0.9088, "grad_norm": 0.5031746296549354, "learning_rate": 2.5659257678535664e-07, "loss": 0.5973, "step": 1420 }, { "epoch": 0.9152, "grad_norm": 0.5001735346325461, "learning_rate": 2.2244970137519585e-07, "loss": 0.5714, "step": 1430 }, { "epoch": 0.9216, "grad_norm": 0.5240436483707327, "learning_rate": 1.9069197929564854e-07, "loss": 0.5651, "step": 1440 }, { "epoch": 0.928, "grad_norm": 0.5137266119174135, "learning_rate": 1.6133526533250566e-07, "loss": 0.575, "step": 1450 }, { "epoch": 0.9344, "grad_norm": 0.5140259395272652, "learning_rate": 1.3439421558768484e-07, "loss": 0.5824, "step": 1460 }, { "epoch": 0.9408, "grad_norm": 0.5069635291328707, "learning_rate": 1.0988228016228508e-07, "loss": 0.6007, "step": 1470 }, { "epoch": 0.9472, "grad_norm": 0.530901574682574, "learning_rate": 8.781169644173748e-08, "loss": 0.5847, "step": 1480 }, { "epoch": 0.9536, "grad_norm": 0.4927206207800097, "learning_rate": 6.819348298638839e-08, "loss": 0.5886, "step": 1490 }, { "epoch": 0.96, "grad_norm": 0.5101382262659164, "learning_rate": 5.103743403057027e-08, "loss": 0.5753, "step": 1500 }, { "epoch": 0.9664, "grad_norm": 0.5080493686664402, "learning_rate": 3.635211459291188e-08, "loss": 0.5813, "step": 1510 }, { "epoch": 0.9728, "grad_norm": 0.5114302550607454, "learning_rate": 2.4144856200321587e-08, "loss": 0.5782, "step": 1520 }, { "epoch": 0.9792, "grad_norm": 0.5039757606288552, "learning_rate": 1.4421753227780721e-08, "loss": 0.5664, "step": 1530 }, { "epoch": 0.9856, "grad_norm": 0.47761603416777054, "learning_rate": 7.187659855776852e-09, "loss": 0.6098, "step": 1540 }, { "epoch": 0.992, "grad_norm": 0.46817616259336664, "learning_rate": 2.4461876468934164e-09, "loss": 0.5754, "step": 1550 }, { "epoch": 0.9984, "grad_norm": 0.48655513937513295, "learning_rate": 1.997037427675652e-10, "loss": 0.5788, "step": 1560 } ], "logging_steps": 10, "max_steps": 1563, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 800, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 398611788857344.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }