| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9952556668423828, | |
| "eval_steps": 500, | |
| "global_step": 118, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008434370057986295, | |
| "grad_norm": 0.08799133449792862, | |
| "learning_rate": 4.9999999999999996e-06, | |
| "loss": 1.6351, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.01686874011597259, | |
| "grad_norm": 0.08821269869804382, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 1.6405, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.025303110173958882, | |
| "grad_norm": 0.028541648760437965, | |
| "learning_rate": 1.5e-05, | |
| "loss": 1.6264, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.03373748023194518, | |
| "grad_norm": 0.016522206366062164, | |
| "learning_rate": 1.9999999999999998e-05, | |
| "loss": 1.6233, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.04217185028993147, | |
| "grad_norm": 0.054906539618968964, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.62, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.050606220347917764, | |
| "grad_norm": 0.0514790378510952, | |
| "learning_rate": 3e-05, | |
| "loss": 1.6134, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.05904059040590406, | |
| "grad_norm": 0.04156072437763214, | |
| "learning_rate": 3.5000000000000004e-05, | |
| "loss": 1.6169, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.06747496046389036, | |
| "grad_norm": 0.05689298361539841, | |
| "learning_rate": 3.9999999999999996e-05, | |
| "loss": 1.6143, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.07590933052187665, | |
| "grad_norm": 0.041525840759277344, | |
| "learning_rate": 4.5e-05, | |
| "loss": 1.6104, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.08434370057986294, | |
| "grad_norm": 0.031016899272799492, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6028, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.09277807063784924, | |
| "grad_norm": 0.03775344789028168, | |
| "learning_rate": 5.5e-05, | |
| "loss": 1.5949, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.10121244069583553, | |
| "grad_norm": 0.027061201632022858, | |
| "learning_rate": 6e-05, | |
| "loss": 1.5966, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.10964681075382182, | |
| "grad_norm": 0.03555454686284065, | |
| "learning_rate": 5.998682509526384e-05, | |
| "loss": 1.601, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.11808118081180811, | |
| "grad_norm": 0.038648299872875214, | |
| "learning_rate": 5.994731195292965e-05, | |
| "loss": 1.6015, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.1265155508697944, | |
| "grad_norm": 0.03883035108447075, | |
| "learning_rate": 5.988149527845651e-05, | |
| "loss": 1.5992, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.13494992092778071, | |
| "grad_norm": 0.03391977399587631, | |
| "learning_rate": 5.978943288040551e-05, | |
| "loss": 1.5932, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.143384290985767, | |
| "grad_norm": 0.0362255796790123, | |
| "learning_rate": 5.967120561966492e-05, | |
| "loss": 1.5873, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.1518186610437533, | |
| "grad_norm": 0.027403229847550392, | |
| "learning_rate": 5.952691733842791e-05, | |
| "loss": 1.5845, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.16025303110173958, | |
| "grad_norm": 0.02821512520313263, | |
| "learning_rate": 5.935669476898512e-05, | |
| "loss": 1.5942, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.16868740115972589, | |
| "grad_norm": 0.022913869470357895, | |
| "learning_rate": 5.9160687422412324e-05, | |
| "loss": 1.5976, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.17712177121771217, | |
| "grad_norm": 0.02420000359416008, | |
| "learning_rate": 5.893906745725076e-05, | |
| "loss": 1.5862, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.18555614127569847, | |
| "grad_norm": 0.021311871707439423, | |
| "learning_rate": 5.8692029528295675e-05, | |
| "loss": 1.5877, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.19399051133368478, | |
| "grad_norm": 0.024183662608265877, | |
| "learning_rate": 5.841979061562574e-05, | |
| "loss": 1.584, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.20242488139167106, | |
| "grad_norm": 0.02072131633758545, | |
| "learning_rate": 5.8122589834023634e-05, | |
| "loss": 1.5841, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.21085925144965736, | |
| "grad_norm": 0.023273587226867676, | |
| "learning_rate": 5.7800688222955e-05, | |
| "loss": 1.5845, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.21929362150764364, | |
| "grad_norm": 0.0180776659399271, | |
| "learning_rate": 5.745436851729055e-05, | |
| "loss": 1.594, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.22772799156562995, | |
| "grad_norm": 0.018995055928826332, | |
| "learning_rate": 5.708393489897231e-05, | |
| "loss": 1.5903, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.23616236162361623, | |
| "grad_norm": 0.017286648973822594, | |
| "learning_rate": 5.668971272984242e-05, | |
| "loss": 1.5804, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.24459673168160254, | |
| "grad_norm": 0.018625088036060333, | |
| "learning_rate": 5.6272048265869104e-05, | |
| "loss": 1.5798, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.2530311017395888, | |
| "grad_norm": 0.017109202221035957, | |
| "learning_rate": 5.583130835302066e-05, | |
| "loss": 1.5848, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.2614654717975751, | |
| "grad_norm": 0.017000902444124222, | |
| "learning_rate": 5.536788010505478e-05, | |
| "loss": 1.5751, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.26989984185556143, | |
| "grad_norm": 0.018897738307714462, | |
| "learning_rate": 5.4882170563506055e-05, | |
| "loss": 1.5799, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.2783342119135477, | |
| "grad_norm": 0.017153726890683174, | |
| "learning_rate": 5.437460634017044e-05, | |
| "loss": 1.5758, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.286768581971534, | |
| "grad_norm": 0.020006069913506508, | |
| "learning_rate": 5.3845633242400604e-05, | |
| "loss": 1.5774, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.2952029520295203, | |
| "grad_norm": 0.016250574961304665, | |
| "learning_rate": 5.329571588154127e-05, | |
| "loss": 1.5748, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.3036373220875066, | |
| "grad_norm": 0.019675249233841896, | |
| "learning_rate": 5.2725337264848605e-05, | |
| "loss": 1.5772, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.3120716921454929, | |
| "grad_norm": 0.017005721107125282, | |
| "learning_rate": 5.213499837125182e-05, | |
| "loss": 1.5697, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.32050606220347916, | |
| "grad_norm": 0.01664470136165619, | |
| "learning_rate": 5.152521771132993e-05, | |
| "loss": 1.5761, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.32894043226146547, | |
| "grad_norm": 0.01764543540775776, | |
| "learning_rate": 5.0896530871889914e-05, | |
| "loss": 1.5793, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.33737480231945177, | |
| "grad_norm": 0.016753442585468292, | |
| "learning_rate": 5.024949004554632e-05, | |
| "loss": 1.5658, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.3458091723774381, | |
| "grad_norm": 0.019939422607421875, | |
| "learning_rate": 4.958466354571565e-05, | |
| "loss": 1.5762, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.35424354243542433, | |
| "grad_norm": 0.01566561497747898, | |
| "learning_rate": 4.890263530745134e-05, | |
| "loss": 1.5703, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.36267791249341064, | |
| "grad_norm": 0.015579808503389359, | |
| "learning_rate": 4.8204004374557806e-05, | |
| "loss": 1.577, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.37111228255139694, | |
| "grad_norm": 0.016742996871471405, | |
| "learning_rate": 4.748938437343416e-05, | |
| "loss": 1.5726, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.37954665260938325, | |
| "grad_norm": 0.017128925770521164, | |
| "learning_rate": 4.675940297410958e-05, | |
| "loss": 1.579, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.38798102266736956, | |
| "grad_norm": 0.015266829170286655, | |
| "learning_rate": 4.601470133894373e-05, | |
| "loss": 1.5611, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.3964153927253558, | |
| "grad_norm": 0.014922689646482468, | |
| "learning_rate": 4.525593355947662e-05, | |
| "loss": 1.5725, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.4048497627833421, | |
| "grad_norm": 0.01651890017092228, | |
| "learning_rate": 4.448376608192235e-05, | |
| "loss": 1.5679, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.4132841328413284, | |
| "grad_norm": 0.013002808205783367, | |
| "learning_rate": 4.3698877121811395e-05, | |
| "loss": 1.5712, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.42171850289931473, | |
| "grad_norm": 0.013684232719242573, | |
| "learning_rate": 4.290195606829562e-05, | |
| "loss": 1.5683, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.430152872957301, | |
| "grad_norm": 0.01470887940376997, | |
| "learning_rate": 4.2093702878639174e-05, | |
| "loss": 1.5784, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.4385872430152873, | |
| "grad_norm": 0.013774153776466846, | |
| "learning_rate": 4.127482746342714e-05, | |
| "loss": 1.5648, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.4470216130732736, | |
| "grad_norm": 0.01601037010550499, | |
| "learning_rate": 4.044604906303197e-05, | |
| "loss": 1.5671, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.4554559831312599, | |
| "grad_norm": 0.013479109853506088, | |
| "learning_rate": 3.960809561588513e-05, | |
| "loss": 1.5759, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.46389035318924615, | |
| "grad_norm": 0.01525378692895174, | |
| "learning_rate": 3.876170311910928e-05, | |
| "loss": 1.5672, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.47232472324723246, | |
| "grad_norm": 0.013126607052981853, | |
| "learning_rate": 3.790761498207203e-05, | |
| "loss": 1.5744, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.48075909330521877, | |
| "grad_norm": 0.013218970037996769, | |
| "learning_rate": 3.704658137342952e-05, | |
| "loss": 1.5688, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.48919346336320507, | |
| "grad_norm": 0.014142030850052834, | |
| "learning_rate": 3.617935856223295e-05, | |
| "loss": 1.5742, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.4976278334211914, | |
| "grad_norm": 0.013189482502639294, | |
| "learning_rate": 3.5306708253677186e-05, | |
| "loss": 1.5615, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.5060622034791776, | |
| "grad_norm": 0.014055909588932991, | |
| "learning_rate": 3.442939692007444e-05, | |
| "loss": 1.5456, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.5144965735371639, | |
| "grad_norm": 0.011999402195215225, | |
| "learning_rate": 3.354819512764097e-05, | |
| "loss": 1.5579, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.5229309435951502, | |
| "grad_norm": 0.015170286409556866, | |
| "learning_rate": 3.2663876859688045e-05, | |
| "loss": 1.5606, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.5313653136531366, | |
| "grad_norm": 0.013461374677717686, | |
| "learning_rate": 3.177721883681143e-05, | |
| "loss": 1.5631, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.5397996837111229, | |
| "grad_norm": 0.014450161717832088, | |
| "learning_rate": 3.0888999834676796e-05, | |
| "loss": 1.5606, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.5482340537691092, | |
| "grad_norm": 0.014033439569175243, | |
| "learning_rate": 3e-05, | |
| "loss": 1.5638, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.5566684238270954, | |
| "grad_norm": 0.014029957354068756, | |
| "learning_rate": 2.9111000165323206e-05, | |
| "loss": 1.5656, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.5651027938850817, | |
| "grad_norm": 0.016938265413045883, | |
| "learning_rate": 2.8222781163188573e-05, | |
| "loss": 1.5595, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.573537163943068, | |
| "grad_norm": 0.014442404732108116, | |
| "learning_rate": 2.7336123140311957e-05, | |
| "loss": 1.5627, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.5819715340010543, | |
| "grad_norm": 0.015609300695359707, | |
| "learning_rate": 2.645180487235903e-05, | |
| "loss": 1.5707, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.5904059040590406, | |
| "grad_norm": 0.014037694782018661, | |
| "learning_rate": 2.557060307992557e-05, | |
| "loss": 1.5635, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.5988402741170269, | |
| "grad_norm": 0.013035484589636326, | |
| "learning_rate": 2.469329174632282e-05, | |
| "loss": 1.5635, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.6072746441750132, | |
| "grad_norm": 0.013149570673704147, | |
| "learning_rate": 2.3820641437767053e-05, | |
| "loss": 1.5607, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.6157090142329995, | |
| "grad_norm": 0.01272524707019329, | |
| "learning_rate": 2.2953418626570494e-05, | |
| "loss": 1.5524, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.6241433842909858, | |
| "grad_norm": 0.01219966635107994, | |
| "learning_rate": 2.209238501792798e-05, | |
| "loss": 1.555, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.632577754348972, | |
| "grad_norm": 0.01229917537420988, | |
| "learning_rate": 2.123829688089073e-05, | |
| "loss": 1.5514, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.6410121244069583, | |
| "grad_norm": 0.013784164562821388, | |
| "learning_rate": 2.0391904384114877e-05, | |
| "loss": 1.5614, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.6494464944649446, | |
| "grad_norm": 0.010503321886062622, | |
| "learning_rate": 1.9553950936968042e-05, | |
| "loss": 1.541, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.6578808645229309, | |
| "grad_norm": 0.012291346676647663, | |
| "learning_rate": 1.8725172536572863e-05, | |
| "loss": 1.556, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.6663152345809172, | |
| "grad_norm": 0.011516911908984184, | |
| "learning_rate": 1.7906297121360838e-05, | |
| "loss": 1.5638, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.6747496046389035, | |
| "grad_norm": 0.01181780081242323, | |
| "learning_rate": 1.7098043931704396e-05, | |
| "loss": 1.5508, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.6831839746968899, | |
| "grad_norm": 0.010808738879859447, | |
| "learning_rate": 1.6301122878188607e-05, | |
| "loss": 1.5567, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.6916183447548762, | |
| "grad_norm": 0.010649660602211952, | |
| "learning_rate": 1.551623391807766e-05, | |
| "loss": 1.5484, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.7000527148128625, | |
| "grad_norm": 0.010580360889434814, | |
| "learning_rate": 1.4744066440523391e-05, | |
| "loss": 1.5591, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.7084870848708487, | |
| "grad_norm": 0.010917909443378448, | |
| "learning_rate": 1.3985298661056292e-05, | |
| "loss": 1.569, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.716921454928835, | |
| "grad_norm": 0.01177785824984312, | |
| "learning_rate": 1.324059702589043e-05, | |
| "loss": 1.5631, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.7253558249868213, | |
| "grad_norm": 0.009857219643890858, | |
| "learning_rate": 1.2510615626565844e-05, | |
| "loss": 1.5561, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.7337901950448076, | |
| "grad_norm": 0.011106839403510094, | |
| "learning_rate": 1.1795995625442208e-05, | |
| "loss": 1.5471, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.7422245651027939, | |
| "grad_norm": 0.011377968825399876, | |
| "learning_rate": 1.109736469254867e-05, | |
| "loss": 1.5583, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.7506589351607802, | |
| "grad_norm": 0.010118059813976288, | |
| "learning_rate": 1.0415336454284356e-05, | |
| "loss": 1.5531, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.7590933052187665, | |
| "grad_norm": 0.01021275483071804, | |
| "learning_rate": 9.75050995445369e-06, | |
| "loss": 1.5559, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.7675276752767528, | |
| "grad_norm": 0.00994526594877243, | |
| "learning_rate": 9.103469128110098e-06, | |
| "loss": 1.5527, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.7759620453347391, | |
| "grad_norm": 0.01060432381927967, | |
| "learning_rate": 8.474782288670058e-06, | |
| "loss": 1.5514, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.7843964153927253, | |
| "grad_norm": 0.011965557001531124, | |
| "learning_rate": 7.86500162874818e-06, | |
| "loss": 1.5536, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.7928307854507116, | |
| "grad_norm": 0.010221057571470737, | |
| "learning_rate": 7.274662735151396e-06, | |
| "loss": 1.5541, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.8012651555086979, | |
| "grad_norm": 0.01093184296041727, | |
| "learning_rate": 6.704284118458731e-06, | |
| "loss": 1.5512, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.8096995255666842, | |
| "grad_norm": 0.010998157784342766, | |
| "learning_rate": 6.154366757599399e-06, | |
| "loss": 1.5492, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.8181338956246705, | |
| "grad_norm": 0.01003272831439972, | |
| "learning_rate": 5.625393659829561e-06, | |
| "loss": 1.5472, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.8265682656826568, | |
| "grad_norm": 0.010513346642255783, | |
| "learning_rate": 5.117829436493947e-06, | |
| "loss": 1.551, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.8350026357406432, | |
| "grad_norm": 0.01016693189740181, | |
| "learning_rate": 4.632119894945215e-06, | |
| "loss": 1.5599, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.8434370057986295, | |
| "grad_norm": 0.009756877087056637, | |
| "learning_rate": 4.1686916469793335e-06, | |
| "loss": 1.5552, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.8518713758566157, | |
| "grad_norm": 0.010328919626772404, | |
| "learning_rate": 3.7279517341308977e-06, | |
| "loss": 1.5645, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.860305745914602, | |
| "grad_norm": 0.009724525734782219, | |
| "learning_rate": 3.3102872701575838e-06, | |
| "loss": 1.5466, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.8687401159725883, | |
| "grad_norm": 0.009452255442738533, | |
| "learning_rate": 2.916065101027694e-06, | |
| "loss": 1.555, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.8771744860305746, | |
| "grad_norm": 0.009558911435306072, | |
| "learning_rate": 2.5456314827094463e-06, | |
| "loss": 1.5479, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.8856088560885609, | |
| "grad_norm": 0.009129817597568035, | |
| "learning_rate": 2.1993117770449987e-06, | |
| "loss": 1.545, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.8940432261465472, | |
| "grad_norm": 0.00930058490484953, | |
| "learning_rate": 1.8774101659763731e-06, | |
| "loss": 1.554, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.9024775962045335, | |
| "grad_norm": 0.009718949906527996, | |
| "learning_rate": 1.5802093843742582e-06, | |
| "loss": 1.5467, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.9109119662625198, | |
| "grad_norm": 0.009196877479553223, | |
| "learning_rate": 1.3079704717043273e-06, | |
| "loss": 1.55, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.9193463363205061, | |
| "grad_norm": 0.00919976457953453, | |
| "learning_rate": 1.060932542749241e-06, | |
| "loss": 1.5558, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.9277807063784923, | |
| "grad_norm": 0.0089542455971241, | |
| "learning_rate": 8.393125775876775e-07, | |
| "loss": 1.5563, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.9362150764364786, | |
| "grad_norm": 0.009196256287395954, | |
| "learning_rate": 6.433052310148791e-07, | |
| "loss": 1.5537, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.9446494464944649, | |
| "grad_norm": 0.009201628156006336, | |
| "learning_rate": 4.730826615720951e-07, | |
| "loss": 1.5567, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.9530838165524512, | |
| "grad_norm": 0.008883966132998466, | |
| "learning_rate": 3.28794380335079e-07, | |
| "loss": 1.5549, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.9615181866104375, | |
| "grad_norm": 0.009221088141202927, | |
| "learning_rate": 2.1056711959449247e-07, | |
| "loss": 1.5585, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.9699525566684238, | |
| "grad_norm": 0.009092201478779316, | |
| "learning_rate": 1.1850472154349313e-07, | |
| "loss": 1.5536, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.9783869267264101, | |
| "grad_norm": 0.009470025077462196, | |
| "learning_rate": 5.268804707035946e-08, | |
| "loss": 1.5705, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.9868212967843965, | |
| "grad_norm": 0.008715336211025715, | |
| "learning_rate": 1.3174904736169557e-08, | |
| "loss": 1.5566, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.9952556668423828, | |
| "grad_norm": 0.008857190608978271, | |
| "learning_rate": 0.0, | |
| "loss": 1.5464, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.9952556668423828, | |
| "step": 118, | |
| "total_flos": 1660937136242688.0, | |
| "train_loss": 1.571211524939133, | |
| "train_runtime": 47361.2024, | |
| "train_samples_per_second": 0.641, | |
| "train_steps_per_second": 0.002 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 118, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1660937136242688.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |