| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 216015, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.006943962224845497, | |
| "grad_norm": 2.206239700317383, | |
| "learning_rate": 0.00029930699256996036, | |
| "loss": 4.6213, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.013887924449690994, | |
| "grad_norm": 1.519214153289795, | |
| "learning_rate": 0.0002986125963474758, | |
| "loss": 4.5637, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.02083188667453649, | |
| "grad_norm": 2.3049542903900146, | |
| "learning_rate": 0.0002979182001249913, | |
| "loss": 4.514, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.027775848899381988, | |
| "grad_norm": 2.0959904193878174, | |
| "learning_rate": 0.00029722380390250673, | |
| "loss": 4.4586, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.034719811124227486, | |
| "grad_norm": 2.667476177215576, | |
| "learning_rate": 0.0002965294076800222, | |
| "loss": 4.4977, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.04166377334907298, | |
| "grad_norm": 1.8514024019241333, | |
| "learning_rate": 0.00029583501145753765, | |
| "loss": 4.4347, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.04860773557391848, | |
| "grad_norm": 1.47382390499115, | |
| "learning_rate": 0.00029514061523505306, | |
| "loss": 4.4555, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.055551697798763976, | |
| "grad_norm": 1.5439454317092896, | |
| "learning_rate": 0.0002944462190125685, | |
| "loss": 4.4132, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.06249566002360947, | |
| "grad_norm": 2.2414743900299072, | |
| "learning_rate": 0.000293751822790084, | |
| "loss": 4.4148, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.06943962224845497, | |
| "grad_norm": 2.522726058959961, | |
| "learning_rate": 0.00029305742656759944, | |
| "loss": 4.3732, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.07638358447330046, | |
| "grad_norm": 2.1512997150421143, | |
| "learning_rate": 0.0002923630303451149, | |
| "loss": 4.3563, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.08332754669814596, | |
| "grad_norm": 1.3137620687484741, | |
| "learning_rate": 0.00029166863412263035, | |
| "loss": 4.3309, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.09027150892299146, | |
| "grad_norm": 2.2317123413085938, | |
| "learning_rate": 0.0002909742379001458, | |
| "loss": 4.335, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.09721547114783696, | |
| "grad_norm": 1.4936091899871826, | |
| "learning_rate": 0.0002902798416776612, | |
| "loss": 4.3287, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.10415943337268245, | |
| "grad_norm": 1.4944448471069336, | |
| "learning_rate": 0.0002895854454551767, | |
| "loss": 4.2728, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.11110339559752795, | |
| "grad_norm": 2.103372573852539, | |
| "learning_rate": 0.00028889104923269214, | |
| "loss": 4.2945, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.11804735782237345, | |
| "grad_norm": 1.6695603132247925, | |
| "learning_rate": 0.0002881966530102076, | |
| "loss": 4.3126, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.12499132004721894, | |
| "grad_norm": 2.2535531520843506, | |
| "learning_rate": 0.00028750225678772306, | |
| "loss": 4.3014, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.13193528227206444, | |
| "grad_norm": 2.5216352939605713, | |
| "learning_rate": 0.0002868078605652385, | |
| "loss": 4.233, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.13887924449690994, | |
| "grad_norm": 2.3059794902801514, | |
| "learning_rate": 0.0002861134643427539, | |
| "loss": 4.2875, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.14582320672175544, | |
| "grad_norm": 2.7461228370666504, | |
| "learning_rate": 0.0002854190681202694, | |
| "loss": 4.2872, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.15276716894660092, | |
| "grad_norm": 1.7328039407730103, | |
| "learning_rate": 0.00028472467189778484, | |
| "loss": 4.2354, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.15971113117144642, | |
| "grad_norm": 1.904403805732727, | |
| "learning_rate": 0.0002840302756753003, | |
| "loss": 4.2339, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.16665509339629192, | |
| "grad_norm": 1.4885430335998535, | |
| "learning_rate": 0.00028333587945281576, | |
| "loss": 4.2214, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.17359905562113742, | |
| "grad_norm": 1.5213295221328735, | |
| "learning_rate": 0.0002826414832303312, | |
| "loss": 4.1642, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.18054301784598292, | |
| "grad_norm": 1.5499557256698608, | |
| "learning_rate": 0.0002819470870078466, | |
| "loss": 4.2083, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.18748698007082842, | |
| "grad_norm": 1.7087458372116089, | |
| "learning_rate": 0.0002812526907853621, | |
| "loss": 4.192, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.19443094229567393, | |
| "grad_norm": 1.4186779260635376, | |
| "learning_rate": 0.00028055829456287754, | |
| "loss": 4.1686, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.2013749045205194, | |
| "grad_norm": 1.7402822971343994, | |
| "learning_rate": 0.000279863898340393, | |
| "loss": 4.1988, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.2083188667453649, | |
| "grad_norm": 1.821722149848938, | |
| "learning_rate": 0.00027916950211790846, | |
| "loss": 4.1941, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.2152628289702104, | |
| "grad_norm": 1.207729458808899, | |
| "learning_rate": 0.0002784751058954239, | |
| "loss": 4.1868, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.2222067911950559, | |
| "grad_norm": 1.804909110069275, | |
| "learning_rate": 0.0002777807096729393, | |
| "loss": 4.1657, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.2291507534199014, | |
| "grad_norm": 2.126279830932617, | |
| "learning_rate": 0.0002770863134504548, | |
| "loss": 4.1083, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.2360947156447469, | |
| "grad_norm": 1.3076670169830322, | |
| "learning_rate": 0.00027639191722797024, | |
| "loss": 4.1446, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.24303867786959238, | |
| "grad_norm": 2.152813196182251, | |
| "learning_rate": 0.0002756975210054857, | |
| "loss": 4.1444, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.24998264009443788, | |
| "grad_norm": 1.9263052940368652, | |
| "learning_rate": 0.00027500312478300116, | |
| "loss": 4.1581, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.2569266023192834, | |
| "grad_norm": 1.7251839637756348, | |
| "learning_rate": 0.0002743087285605166, | |
| "loss": 4.1468, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.2638705645441289, | |
| "grad_norm": 1.2336386442184448, | |
| "learning_rate": 0.0002736143323380321, | |
| "loss": 4.1253, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.2708145267689744, | |
| "grad_norm": 1.2728581428527832, | |
| "learning_rate": 0.0002729199361155475, | |
| "loss": 4.0935, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.2777584889938199, | |
| "grad_norm": 1.6062270402908325, | |
| "learning_rate": 0.00027222553989306294, | |
| "loss": 4.1383, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.2847024512186654, | |
| "grad_norm": 1.4133198261260986, | |
| "learning_rate": 0.0002715311436705784, | |
| "loss": 4.1044, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.2916464134435109, | |
| "grad_norm": 1.421473741531372, | |
| "learning_rate": 0.00027083674744809386, | |
| "loss": 4.103, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.2985903756683564, | |
| "grad_norm": 1.92391836643219, | |
| "learning_rate": 0.0002701423512256093, | |
| "loss": 4.0771, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.30553433789320184, | |
| "grad_norm": 2.6180472373962402, | |
| "learning_rate": 0.0002694479550031248, | |
| "loss": 4.0713, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.31247830011804734, | |
| "grad_norm": 2.3145902156829834, | |
| "learning_rate": 0.0002687535587806402, | |
| "loss": 4.0287, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.31942226234289284, | |
| "grad_norm": 1.9222602844238281, | |
| "learning_rate": 0.00026805916255815565, | |
| "loss": 4.0714, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.32636622456773834, | |
| "grad_norm": 1.6106317043304443, | |
| "learning_rate": 0.0002673647663356711, | |
| "loss": 4.0375, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.33331018679258384, | |
| "grad_norm": 1.6297123432159424, | |
| "learning_rate": 0.00026667037011318657, | |
| "loss": 4.0668, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.34025414901742934, | |
| "grad_norm": 2.0038726329803467, | |
| "learning_rate": 0.000265975973890702, | |
| "loss": 4.0266, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.34719811124227484, | |
| "grad_norm": 1.7728261947631836, | |
| "learning_rate": 0.0002652815776682175, | |
| "loss": 4.0499, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.35414207346712034, | |
| "grad_norm": 1.9266184568405151, | |
| "learning_rate": 0.0002645871814457329, | |
| "loss": 4.0055, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.36108603569196585, | |
| "grad_norm": 3.5189244747161865, | |
| "learning_rate": 0.00026389278522324835, | |
| "loss": 4.004, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.36802999791681135, | |
| "grad_norm": 1.9975138902664185, | |
| "learning_rate": 0.0002631983890007638, | |
| "loss": 3.9998, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.37497396014165685, | |
| "grad_norm": 3.087763547897339, | |
| "learning_rate": 0.00026250399277827927, | |
| "loss": 4.0146, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.38191792236650235, | |
| "grad_norm": 1.297499179840088, | |
| "learning_rate": 0.0002618095965557947, | |
| "loss": 4.0206, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.38886188459134785, | |
| "grad_norm": 1.4603493213653564, | |
| "learning_rate": 0.0002611152003333102, | |
| "loss": 4.0337, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.3958058468161933, | |
| "grad_norm": 1.5912282466888428, | |
| "learning_rate": 0.0002604208041108256, | |
| "loss": 3.9878, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.4027498090410388, | |
| "grad_norm": 1.4256983995437622, | |
| "learning_rate": 0.00025972640788834105, | |
| "loss": 4.0107, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.4096937712658843, | |
| "grad_norm": 1.6172006130218506, | |
| "learning_rate": 0.0002590320116658565, | |
| "loss": 4.0011, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.4166377334907298, | |
| "grad_norm": 2.2637939453125, | |
| "learning_rate": 0.00025833761544337197, | |
| "loss": 4.0057, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.4235816957155753, | |
| "grad_norm": 1.6595959663391113, | |
| "learning_rate": 0.00025764321922088743, | |
| "loss": 4.0112, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.4305256579404208, | |
| "grad_norm": 1.7675671577453613, | |
| "learning_rate": 0.0002569488229984029, | |
| "loss": 3.9575, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.4374696201652663, | |
| "grad_norm": 1.9230527877807617, | |
| "learning_rate": 0.00025625442677591835, | |
| "loss": 3.9734, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.4444135823901118, | |
| "grad_norm": 1.6587070226669312, | |
| "learning_rate": 0.00025556003055343375, | |
| "loss": 3.973, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.4513575446149573, | |
| "grad_norm": 1.8445744514465332, | |
| "learning_rate": 0.0002548656343309492, | |
| "loss": 3.9792, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.4583015068398028, | |
| "grad_norm": 2.5224626064300537, | |
| "learning_rate": 0.00025417123810846467, | |
| "loss": 3.9501, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.4652454690646483, | |
| "grad_norm": 1.8237272500991821, | |
| "learning_rate": 0.00025347684188598013, | |
| "loss": 3.9696, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.4721894312894938, | |
| "grad_norm": 3.2028214931488037, | |
| "learning_rate": 0.0002527824456634956, | |
| "loss": 3.9386, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.47913339351433926, | |
| "grad_norm": 1.7500147819519043, | |
| "learning_rate": 0.00025208804944101105, | |
| "loss": 3.9328, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.48607735573918476, | |
| "grad_norm": 1.8961389064788818, | |
| "learning_rate": 0.00025139365321852645, | |
| "loss": 3.9132, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.49302131796403026, | |
| "grad_norm": 1.3247839212417603, | |
| "learning_rate": 0.0002506992569960419, | |
| "loss": 3.9139, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.49996528018887576, | |
| "grad_norm": 1.6709811687469482, | |
| "learning_rate": 0.00025000486077355737, | |
| "loss": 3.9646, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.5069092424137213, | |
| "grad_norm": 1.717537760734558, | |
| "learning_rate": 0.00024931046455107283, | |
| "loss": 3.9328, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.5138532046385668, | |
| "grad_norm": 2.0001790523529053, | |
| "learning_rate": 0.0002486160683285883, | |
| "loss": 3.9522, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.5207971668634123, | |
| "grad_norm": 1.53634774684906, | |
| "learning_rate": 0.00024792167210610375, | |
| "loss": 3.8908, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.5277411290882578, | |
| "grad_norm": 1.6681393384933472, | |
| "learning_rate": 0.00024722727588361916, | |
| "loss": 3.9057, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.5346850913131033, | |
| "grad_norm": 1.4480671882629395, | |
| "learning_rate": 0.0002465328796611346, | |
| "loss": 3.9144, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.5416290535379488, | |
| "grad_norm": 2.7067551612854004, | |
| "learning_rate": 0.0002458384834386501, | |
| "loss": 3.9379, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.5485730157627943, | |
| "grad_norm": 1.919639229774475, | |
| "learning_rate": 0.00024514408721616553, | |
| "loss": 3.8559, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.5555169779876398, | |
| "grad_norm": 1.9291149377822876, | |
| "learning_rate": 0.000244449690993681, | |
| "loss": 3.8821, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.5624609402124853, | |
| "grad_norm": 2.4123694896698, | |
| "learning_rate": 0.00024375529477119642, | |
| "loss": 3.9055, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.5694049024373308, | |
| "grad_norm": 1.5772641897201538, | |
| "learning_rate": 0.00024306089854871186, | |
| "loss": 3.9045, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.5763488646621763, | |
| "grad_norm": 2.2179367542266846, | |
| "learning_rate": 0.00024236650232622732, | |
| "loss": 3.9022, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.5832928268870218, | |
| "grad_norm": 1.1816768646240234, | |
| "learning_rate": 0.00024167210610374278, | |
| "loss": 3.8836, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.5902367891118673, | |
| "grad_norm": 1.7630631923675537, | |
| "learning_rate": 0.00024097770988125823, | |
| "loss": 3.8657, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.5971807513367128, | |
| "grad_norm": 1.777095913887024, | |
| "learning_rate": 0.00024028331365877367, | |
| "loss": 3.8529, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.6041247135615583, | |
| "grad_norm": 1.7181869745254517, | |
| "learning_rate": 0.00023958891743628913, | |
| "loss": 3.8781, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.6110686757864037, | |
| "grad_norm": 1.855504035949707, | |
| "learning_rate": 0.00023889452121380459, | |
| "loss": 3.8391, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.6180126380112492, | |
| "grad_norm": 1.7183347940444946, | |
| "learning_rate": 0.00023820012499132002, | |
| "loss": 3.836, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.6249566002360947, | |
| "grad_norm": 1.8155463933944702, | |
| "learning_rate": 0.00023750572876883548, | |
| "loss": 3.8238, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.6319005624609402, | |
| "grad_norm": 1.4531205892562866, | |
| "learning_rate": 0.00023681133254635094, | |
| "loss": 3.8348, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.6388445246857857, | |
| "grad_norm": 2.0805277824401855, | |
| "learning_rate": 0.00023611693632386637, | |
| "loss": 3.8441, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.6457884869106312, | |
| "grad_norm": 2.2948648929595947, | |
| "learning_rate": 0.00023542254010138183, | |
| "loss": 3.8511, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.6527324491354767, | |
| "grad_norm": 1.958189606666565, | |
| "learning_rate": 0.0002347281438788973, | |
| "loss": 3.8678, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.6596764113603222, | |
| "grad_norm": 1.9029563665390015, | |
| "learning_rate": 0.00023403374765641272, | |
| "loss": 3.8204, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.6666203735851677, | |
| "grad_norm": 1.6925806999206543, | |
| "learning_rate": 0.00023333935143392818, | |
| "loss": 3.861, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.6735643358100132, | |
| "grad_norm": 2.417433023452759, | |
| "learning_rate": 0.00023264495521144364, | |
| "loss": 3.8151, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.6805082980348587, | |
| "grad_norm": 1.94263756275177, | |
| "learning_rate": 0.00023195055898895907, | |
| "loss": 3.8351, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.6874522602597042, | |
| "grad_norm": 2.0970757007598877, | |
| "learning_rate": 0.00023125616276647453, | |
| "loss": 3.8728, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.6943962224845497, | |
| "grad_norm": 1.8286621570587158, | |
| "learning_rate": 0.00023056176654399, | |
| "loss": 3.8262, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.7013401847093952, | |
| "grad_norm": 1.3233591318130493, | |
| "learning_rate": 0.00022986737032150542, | |
| "loss": 3.8186, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.7082841469342407, | |
| "grad_norm": 1.760081171989441, | |
| "learning_rate": 0.00022917297409902088, | |
| "loss": 3.8268, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.7152281091590862, | |
| "grad_norm": 2.040560722351074, | |
| "learning_rate": 0.00022847857787653634, | |
| "loss": 3.8425, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.7221720713839317, | |
| "grad_norm": 2.493685007095337, | |
| "learning_rate": 0.00022778418165405177, | |
| "loss": 3.8362, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.7291160336087772, | |
| "grad_norm": 1.7292836904525757, | |
| "learning_rate": 0.00022708978543156723, | |
| "loss": 3.8317, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.7360599958336227, | |
| "grad_norm": 2.2565951347351074, | |
| "learning_rate": 0.0002263953892090827, | |
| "loss": 3.7835, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.7430039580584682, | |
| "grad_norm": 1.6440356969833374, | |
| "learning_rate": 0.00022570099298659812, | |
| "loss": 3.8079, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.7499479202833137, | |
| "grad_norm": 1.8633214235305786, | |
| "learning_rate": 0.00022500659676411358, | |
| "loss": 3.803, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.7568918825081592, | |
| "grad_norm": 2.401519775390625, | |
| "learning_rate": 0.00022431220054162904, | |
| "loss": 3.7996, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.7638358447330047, | |
| "grad_norm": 1.3482192754745483, | |
| "learning_rate": 0.0002236178043191445, | |
| "loss": 3.7957, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.7707798069578502, | |
| "grad_norm": 2.4375321865081787, | |
| "learning_rate": 0.00022292340809665993, | |
| "loss": 3.8011, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.7777237691826957, | |
| "grad_norm": 1.4207526445388794, | |
| "learning_rate": 0.0002222290118741754, | |
| "loss": 3.7809, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.7846677314075411, | |
| "grad_norm": 2.166013717651367, | |
| "learning_rate": 0.00022153461565169085, | |
| "loss": 3.8016, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.7916116936323866, | |
| "grad_norm": 1.4218441247940063, | |
| "learning_rate": 0.00022084021942920628, | |
| "loss": 3.7636, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.7985556558572321, | |
| "grad_norm": 1.5661506652832031, | |
| "learning_rate": 0.00022014582320672174, | |
| "loss": 3.7958, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.8054996180820776, | |
| "grad_norm": 1.311798095703125, | |
| "learning_rate": 0.0002194514269842372, | |
| "loss": 3.7593, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.8124435803069231, | |
| "grad_norm": 1.3802398443222046, | |
| "learning_rate": 0.00021875703076175264, | |
| "loss": 3.7758, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.8193875425317686, | |
| "grad_norm": 1.7688322067260742, | |
| "learning_rate": 0.0002180626345392681, | |
| "loss": 3.7685, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.8263315047566141, | |
| "grad_norm": 1.8496917486190796, | |
| "learning_rate": 0.00021736823831678355, | |
| "loss": 3.8083, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.8332754669814596, | |
| "grad_norm": 1.2840275764465332, | |
| "learning_rate": 0.00021667384209429899, | |
| "loss": 3.7912, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.8402194292063051, | |
| "grad_norm": 1.4152112007141113, | |
| "learning_rate": 0.00021597944587181445, | |
| "loss": 3.75, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.8471633914311506, | |
| "grad_norm": 2.001692771911621, | |
| "learning_rate": 0.0002152850496493299, | |
| "loss": 3.7748, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.8541073536559961, | |
| "grad_norm": 2.6924116611480713, | |
| "learning_rate": 0.00021459065342684534, | |
| "loss": 3.7576, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.8610513158808416, | |
| "grad_norm": 1.5775929689407349, | |
| "learning_rate": 0.0002138962572043608, | |
| "loss": 3.7357, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.8679952781056871, | |
| "grad_norm": 2.122657060623169, | |
| "learning_rate": 0.00021320186098187626, | |
| "loss": 3.7513, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.8749392403305326, | |
| "grad_norm": 1.8863738775253296, | |
| "learning_rate": 0.0002125074647593917, | |
| "loss": 3.7259, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.8818832025553781, | |
| "grad_norm": 1.46346914768219, | |
| "learning_rate": 0.00021181306853690715, | |
| "loss": 3.7851, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.8888271647802236, | |
| "grad_norm": 2.3657708168029785, | |
| "learning_rate": 0.0002111186723144226, | |
| "loss": 3.7523, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.8957711270050691, | |
| "grad_norm": 1.5897114276885986, | |
| "learning_rate": 0.00021042427609193804, | |
| "loss": 3.7526, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.9027150892299146, | |
| "grad_norm": 1.8869891166687012, | |
| "learning_rate": 0.0002097298798694535, | |
| "loss": 3.7406, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.9096590514547601, | |
| "grad_norm": 1.891735315322876, | |
| "learning_rate": 0.00020903548364696896, | |
| "loss": 3.7229, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.9166030136796056, | |
| "grad_norm": 1.4305230379104614, | |
| "learning_rate": 0.0002083410874244844, | |
| "loss": 3.7712, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.9235469759044511, | |
| "grad_norm": 1.571385145187378, | |
| "learning_rate": 0.00020764669120199985, | |
| "loss": 3.739, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.9304909381292966, | |
| "grad_norm": 1.64103102684021, | |
| "learning_rate": 0.0002069522949795153, | |
| "loss": 3.7291, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.9374349003541421, | |
| "grad_norm": 1.683289647102356, | |
| "learning_rate": 0.00020625789875703077, | |
| "loss": 3.7349, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.9443788625789876, | |
| "grad_norm": 1.9319536685943604, | |
| "learning_rate": 0.0002055635025345462, | |
| "loss": 3.7298, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.9513228248038331, | |
| "grad_norm": 1.2139371633529663, | |
| "learning_rate": 0.00020486910631206166, | |
| "loss": 3.7077, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.9582667870286785, | |
| "grad_norm": 2.366407871246338, | |
| "learning_rate": 0.00020417471008957712, | |
| "loss": 3.7156, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.965210749253524, | |
| "grad_norm": 1.2618952989578247, | |
| "learning_rate": 0.00020348031386709255, | |
| "loss": 3.6964, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.9721547114783695, | |
| "grad_norm": 1.3639748096466064, | |
| "learning_rate": 0.000202785917644608, | |
| "loss": 3.7082, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.979098673703215, | |
| "grad_norm": 1.7581994533538818, | |
| "learning_rate": 0.00020209152142212347, | |
| "loss": 3.7073, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.9860426359280605, | |
| "grad_norm": 2.42798113822937, | |
| "learning_rate": 0.00020139712519963887, | |
| "loss": 3.7116, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.992986598152906, | |
| "grad_norm": 1.6432359218597412, | |
| "learning_rate": 0.00020070272897715436, | |
| "loss": 3.6997, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.9999305603777515, | |
| "grad_norm": 1.079620361328125, | |
| "learning_rate": 0.00020000833275466982, | |
| "loss": 3.6916, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 3.4631764888763428, | |
| "eval_rouge1": 0.04941977533882873, | |
| "eval_rouge2": 0.009172388840507199, | |
| "eval_rougeL": 0.04889121785661922, | |
| "eval_rougeLsum": 0.04914022943695945, | |
| "eval_runtime": 3964.6307, | |
| "eval_samples_per_second": 4.036, | |
| "eval_steps_per_second": 2.018, | |
| "step": 72005 | |
| }, | |
| { | |
| "epoch": 1.006874522602597, | |
| "grad_norm": 1.7696194648742676, | |
| "learning_rate": 0.00019931393653218523, | |
| "loss": 3.6153, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 1.0138184848274425, | |
| "grad_norm": 1.091058611869812, | |
| "learning_rate": 0.00019861954030970068, | |
| "loss": 3.6146, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 1.020762447052288, | |
| "grad_norm": 1.8638333082199097, | |
| "learning_rate": 0.00019792514408721617, | |
| "loss": 3.609, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 1.0277064092771335, | |
| "grad_norm": 2.0278666019439697, | |
| "learning_rate": 0.00019723074786473158, | |
| "loss": 3.6129, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 1.034650371501979, | |
| "grad_norm": 2.3207359313964844, | |
| "learning_rate": 0.00019653635164224704, | |
| "loss": 3.601, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 1.0415943337268245, | |
| "grad_norm": 1.4969432353973389, | |
| "learning_rate": 0.00019584195541976252, | |
| "loss": 3.604, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 1.04853829595167, | |
| "grad_norm": 1.567814588546753, | |
| "learning_rate": 0.00019514755919727793, | |
| "loss": 3.5989, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 1.0554822581765155, | |
| "grad_norm": 1.7745885848999023, | |
| "learning_rate": 0.00019445316297479339, | |
| "loss": 3.5884, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 1.062426220401361, | |
| "grad_norm": 1.9692455530166626, | |
| "learning_rate": 0.00019375876675230885, | |
| "loss": 3.6422, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 1.0693701826262065, | |
| "grad_norm": 1.5432820320129395, | |
| "learning_rate": 0.00019306437052982428, | |
| "loss": 3.5908, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 1.076314144851052, | |
| "grad_norm": 1.8240996599197388, | |
| "learning_rate": 0.00019236997430733974, | |
| "loss": 3.6187, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 1.0832581070758975, | |
| "grad_norm": 1.2577378749847412, | |
| "learning_rate": 0.0001916755780848552, | |
| "loss": 3.537, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 1.090202069300743, | |
| "grad_norm": 1.969597339630127, | |
| "learning_rate": 0.00019098118186237063, | |
| "loss": 3.6229, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 1.0971460315255885, | |
| "grad_norm": 2.464380979537964, | |
| "learning_rate": 0.0001902867856398861, | |
| "loss": 3.6009, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 1.104089993750434, | |
| "grad_norm": 2.1590375900268555, | |
| "learning_rate": 0.00018959238941740155, | |
| "loss": 3.5834, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 1.1110339559752795, | |
| "grad_norm": 1.2929880619049072, | |
| "learning_rate": 0.000188897993194917, | |
| "loss": 3.605, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 1.117977918200125, | |
| "grad_norm": 2.7584567070007324, | |
| "learning_rate": 0.00018820359697243244, | |
| "loss": 3.5863, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 1.1249218804249705, | |
| "grad_norm": 2.8105475902557373, | |
| "learning_rate": 0.0001875092007499479, | |
| "loss": 3.5646, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 1.131865842649816, | |
| "grad_norm": 1.6573877334594727, | |
| "learning_rate": 0.00018681480452746336, | |
| "loss": 3.6216, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 1.1388098048746615, | |
| "grad_norm": 1.3650224208831787, | |
| "learning_rate": 0.0001861204083049788, | |
| "loss": 3.6155, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 1.145753767099507, | |
| "grad_norm": 1.3206992149353027, | |
| "learning_rate": 0.00018542601208249425, | |
| "loss": 3.62, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 1.1526977293243525, | |
| "grad_norm": 2.5373497009277344, | |
| "learning_rate": 0.0001847316158600097, | |
| "loss": 3.595, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 1.159641691549198, | |
| "grad_norm": 1.503808856010437, | |
| "learning_rate": 0.00018403721963752514, | |
| "loss": 3.6213, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 1.1665856537740436, | |
| "grad_norm": 2.725497007369995, | |
| "learning_rate": 0.0001833428234150406, | |
| "loss": 3.6006, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 1.173529615998889, | |
| "grad_norm": 1.50645112991333, | |
| "learning_rate": 0.00018264842719255606, | |
| "loss": 3.6263, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 1.1804735782237346, | |
| "grad_norm": 1.8035708665847778, | |
| "learning_rate": 0.0001819540309700715, | |
| "loss": 3.5539, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 1.18741754044858, | |
| "grad_norm": 1.9327325820922852, | |
| "learning_rate": 0.00018125963474758695, | |
| "loss": 3.5822, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 1.1943615026734253, | |
| "grad_norm": 1.14821195602417, | |
| "learning_rate": 0.0001805652385251024, | |
| "loss": 3.6075, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 1.201305464898271, | |
| "grad_norm": 1.6110094785690308, | |
| "learning_rate": 0.00017987084230261784, | |
| "loss": 3.5879, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 1.2082494271231163, | |
| "grad_norm": 1.9771841764450073, | |
| "learning_rate": 0.0001791764460801333, | |
| "loss": 3.5976, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 1.215193389347962, | |
| "grad_norm": 1.6200594902038574, | |
| "learning_rate": 0.00017848204985764876, | |
| "loss": 3.5788, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 1.2221373515728073, | |
| "grad_norm": 1.9356050491333008, | |
| "learning_rate": 0.0001777876536351642, | |
| "loss": 3.5611, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 1.229081313797653, | |
| "grad_norm": 2.2079310417175293, | |
| "learning_rate": 0.00017709325741267965, | |
| "loss": 3.5828, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 1.2360252760224983, | |
| "grad_norm": 1.6722863912582397, | |
| "learning_rate": 0.0001763988611901951, | |
| "loss": 3.5509, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 1.2429692382473438, | |
| "grad_norm": 1.2027846574783325, | |
| "learning_rate": 0.00017570446496771054, | |
| "loss": 3.5512, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 1.2499132004721893, | |
| "grad_norm": 3.1451849937438965, | |
| "learning_rate": 0.000175010068745226, | |
| "loss": 3.5704, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 1.2568571626970348, | |
| "grad_norm": 1.64677095413208, | |
| "learning_rate": 0.00017431567252274146, | |
| "loss": 3.5879, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 1.2638011249218803, | |
| "grad_norm": 1.2925798892974854, | |
| "learning_rate": 0.0001736212763002569, | |
| "loss": 3.5499, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 1.2707450871467258, | |
| "grad_norm": 1.2768690586090088, | |
| "learning_rate": 0.00017292688007777235, | |
| "loss": 3.5538, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 1.2776890493715714, | |
| "grad_norm": 2.6654281616210938, | |
| "learning_rate": 0.0001722324838552878, | |
| "loss": 3.5431, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 1.2846330115964169, | |
| "grad_norm": 1.8698071241378784, | |
| "learning_rate": 0.00017153808763280327, | |
| "loss": 3.5646, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 1.2915769738212624, | |
| "grad_norm": 1.6036585569381714, | |
| "learning_rate": 0.0001708436914103187, | |
| "loss": 3.5595, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 1.2985209360461079, | |
| "grad_norm": 1.4960416555404663, | |
| "learning_rate": 0.00017014929518783416, | |
| "loss": 3.549, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 1.3054648982709534, | |
| "grad_norm": 1.5892603397369385, | |
| "learning_rate": 0.00016945489896534962, | |
| "loss": 3.5144, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 1.3124088604957989, | |
| "grad_norm": 1.2791684865951538, | |
| "learning_rate": 0.00016876050274286506, | |
| "loss": 3.5494, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 1.3193528227206444, | |
| "grad_norm": 4.176353454589844, | |
| "learning_rate": 0.00016806610652038052, | |
| "loss": 3.5705, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 1.3262967849454899, | |
| "grad_norm": 1.6479995250701904, | |
| "learning_rate": 0.00016737171029789597, | |
| "loss": 3.5644, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 1.3332407471703354, | |
| "grad_norm": 1.6295863389968872, | |
| "learning_rate": 0.0001666773140754114, | |
| "loss": 3.5614, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 1.3401847093951809, | |
| "grad_norm": 2.4127180576324463, | |
| "learning_rate": 0.00016598291785292687, | |
| "loss": 3.5488, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 1.3471286716200264, | |
| "grad_norm": 2.4236507415771484, | |
| "learning_rate": 0.00016528852163044233, | |
| "loss": 3.5188, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 1.3540726338448719, | |
| "grad_norm": 1.2415298223495483, | |
| "learning_rate": 0.00016459412540795776, | |
| "loss": 3.5843, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 1.3610165960697174, | |
| "grad_norm": 2.39335298538208, | |
| "learning_rate": 0.00016389972918547322, | |
| "loss": 3.5445, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 1.3679605582945629, | |
| "grad_norm": 1.481112003326416, | |
| "learning_rate": 0.00016320533296298868, | |
| "loss": 3.5725, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 1.3749045205194084, | |
| "grad_norm": 1.8762099742889404, | |
| "learning_rate": 0.0001625109367405041, | |
| "loss": 3.5422, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 1.3818484827442539, | |
| "grad_norm": 1.4844539165496826, | |
| "learning_rate": 0.00016181654051801957, | |
| "loss": 3.5469, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 1.3887924449690994, | |
| "grad_norm": 1.776289701461792, | |
| "learning_rate": 0.00016112214429553503, | |
| "loss": 3.5329, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 1.3957364071939449, | |
| "grad_norm": 1.566076636314392, | |
| "learning_rate": 0.00016042774807305046, | |
| "loss": 3.5146, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 1.4026803694187904, | |
| "grad_norm": 1.8773123025894165, | |
| "learning_rate": 0.00015973335185056592, | |
| "loss": 3.5298, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 1.4096243316436359, | |
| "grad_norm": 1.92935049533844, | |
| "learning_rate": 0.00015903895562808138, | |
| "loss": 3.5298, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 1.4165682938684814, | |
| "grad_norm": 1.807790994644165, | |
| "learning_rate": 0.0001583445594055968, | |
| "loss": 3.5586, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 1.4235122560933269, | |
| "grad_norm": 1.67229425907135, | |
| "learning_rate": 0.00015765016318311227, | |
| "loss": 3.5202, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 1.4304562183181724, | |
| "grad_norm": 1.2355769872665405, | |
| "learning_rate": 0.00015695576696062773, | |
| "loss": 3.5161, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 1.4374001805430179, | |
| "grad_norm": 1.7655647993087769, | |
| "learning_rate": 0.00015626137073814316, | |
| "loss": 3.5094, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 1.4443441427678634, | |
| "grad_norm": 1.4021390676498413, | |
| "learning_rate": 0.00015556697451565862, | |
| "loss": 3.5533, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 1.4512881049927089, | |
| "grad_norm": 1.7360609769821167, | |
| "learning_rate": 0.00015487257829317408, | |
| "loss": 3.4831, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 1.4582320672175544, | |
| "grad_norm": 1.5841504335403442, | |
| "learning_rate": 0.00015417818207068954, | |
| "loss": 3.5008, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 1.4651760294423999, | |
| "grad_norm": 1.7698231935501099, | |
| "learning_rate": 0.00015348378584820497, | |
| "loss": 3.4889, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 1.4721199916672454, | |
| "grad_norm": 1.3631160259246826, | |
| "learning_rate": 0.00015278938962572043, | |
| "loss": 3.5313, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 1.4790639538920909, | |
| "grad_norm": 1.3617082834243774, | |
| "learning_rate": 0.0001520949934032359, | |
| "loss": 3.5399, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 1.4860079161169364, | |
| "grad_norm": 1.367946743965149, | |
| "learning_rate": 0.00015140059718075132, | |
| "loss": 3.545, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 1.492951878341782, | |
| "grad_norm": 1.3500925302505493, | |
| "learning_rate": 0.00015070620095826678, | |
| "loss": 3.5544, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 1.4998958405666274, | |
| "grad_norm": 3.89847731590271, | |
| "learning_rate": 0.00015001180473578224, | |
| "loss": 3.4885, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 1.5068398027914727, | |
| "grad_norm": 2.2299306392669678, | |
| "learning_rate": 0.00014931740851329767, | |
| "loss": 3.4893, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 1.5137837650163184, | |
| "grad_norm": 2.350405693054199, | |
| "learning_rate": 0.00014862301229081313, | |
| "loss": 3.4992, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 1.5207277272411637, | |
| "grad_norm": 1.3148006200790405, | |
| "learning_rate": 0.00014792861606832856, | |
| "loss": 3.5146, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 1.5276716894660094, | |
| "grad_norm": 1.533084750175476, | |
| "learning_rate": 0.00014723421984584402, | |
| "loss": 3.5149, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 1.5346156516908547, | |
| "grad_norm": 1.3361338376998901, | |
| "learning_rate": 0.00014653982362335948, | |
| "loss": 3.4956, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 1.5415596139157004, | |
| "grad_norm": 1.581416130065918, | |
| "learning_rate": 0.00014584542740087492, | |
| "loss": 3.5074, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 1.5485035761405457, | |
| "grad_norm": 1.6259821653366089, | |
| "learning_rate": 0.00014515103117839037, | |
| "loss": 3.4961, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 1.5554475383653914, | |
| "grad_norm": 1.981719732284546, | |
| "learning_rate": 0.00014445663495590583, | |
| "loss": 3.5192, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 1.5623915005902367, | |
| "grad_norm": 1.2760684490203857, | |
| "learning_rate": 0.00014376223873342127, | |
| "loss": 3.5265, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 1.5693354628150824, | |
| "grad_norm": 7.409369468688965, | |
| "learning_rate": 0.00014306784251093673, | |
| "loss": 3.5335, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 1.5762794250399277, | |
| "grad_norm": 2.6644575595855713, | |
| "learning_rate": 0.00014237344628845218, | |
| "loss": 3.4924, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 1.5832233872647734, | |
| "grad_norm": 1.3111194372177124, | |
| "learning_rate": 0.00014167905006596764, | |
| "loss": 3.5339, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 1.5901673494896187, | |
| "grad_norm": 4.329043388366699, | |
| "learning_rate": 0.00014098465384348308, | |
| "loss": 3.4945, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 1.5971113117144644, | |
| "grad_norm": 1.5919106006622314, | |
| "learning_rate": 0.00014029025762099854, | |
| "loss": 3.5081, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 1.6040552739393097, | |
| "grad_norm": 1.7565652132034302, | |
| "learning_rate": 0.000139595861398514, | |
| "loss": 3.5337, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 1.6109992361641554, | |
| "grad_norm": 1.8960776329040527, | |
| "learning_rate": 0.00013890146517602943, | |
| "loss": 3.4879, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 1.6179431983890007, | |
| "grad_norm": 1.8651204109191895, | |
| "learning_rate": 0.0001382070689535449, | |
| "loss": 3.483, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 1.6248871606138464, | |
| "grad_norm": 2.5513360500335693, | |
| "learning_rate": 0.00013751267273106035, | |
| "loss": 3.4763, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 1.6318311228386917, | |
| "grad_norm": 1.5704069137573242, | |
| "learning_rate": 0.00013681827650857578, | |
| "loss": 3.4932, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 1.6387750850635374, | |
| "grad_norm": 1.619181513786316, | |
| "learning_rate": 0.00013612388028609124, | |
| "loss": 3.5177, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 1.6457190472883827, | |
| "grad_norm": 1.3884799480438232, | |
| "learning_rate": 0.0001354294840636067, | |
| "loss": 3.5247, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 1.6526630095132282, | |
| "grad_norm": 1.5763874053955078, | |
| "learning_rate": 0.00013473508784112213, | |
| "loss": 3.4879, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 1.6596069717380737, | |
| "grad_norm": 1.2959508895874023, | |
| "learning_rate": 0.00013404069161863756, | |
| "loss": 3.4924, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 1.6665509339629192, | |
| "grad_norm": 3.481456756591797, | |
| "learning_rate": 0.00013334629539615305, | |
| "loss": 3.4886, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 1.6734948961877647, | |
| "grad_norm": 6.646812438964844, | |
| "learning_rate": 0.00013265189917366848, | |
| "loss": 3.5136, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 1.6804388584126102, | |
| "grad_norm": 1.200080394744873, | |
| "learning_rate": 0.00013195750295118394, | |
| "loss": 3.5017, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 1.6873828206374557, | |
| "grad_norm": 1.4992713928222656, | |
| "learning_rate": 0.0001312631067286994, | |
| "loss": 3.4533, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 1.6943267828623012, | |
| "grad_norm": 2.5522916316986084, | |
| "learning_rate": 0.00013056871050621483, | |
| "loss": 3.48, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 1.7012707450871467, | |
| "grad_norm": 1.5243773460388184, | |
| "learning_rate": 0.0001298743142837303, | |
| "loss": 3.5058, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 1.7082147073119922, | |
| "grad_norm": 1.4201898574829102, | |
| "learning_rate": 0.00012917991806124572, | |
| "loss": 3.4667, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 1.7151586695368377, | |
| "grad_norm": 1.7786469459533691, | |
| "learning_rate": 0.00012848552183876118, | |
| "loss": 3.4931, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 1.7221026317616832, | |
| "grad_norm": 3.3978912830352783, | |
| "learning_rate": 0.00012779112561627664, | |
| "loss": 3.4855, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 1.7290465939865287, | |
| "grad_norm": 4.56933069229126, | |
| "learning_rate": 0.00012709672939379207, | |
| "loss": 3.4691, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 1.7359905562113742, | |
| "grad_norm": 2.483752489089966, | |
| "learning_rate": 0.00012640233317130753, | |
| "loss": 3.4761, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 1.7429345184362197, | |
| "grad_norm": 1.5909661054611206, | |
| "learning_rate": 0.000125707936948823, | |
| "loss": 3.5052, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 1.7498784806610652, | |
| "grad_norm": 1.670730471611023, | |
| "learning_rate": 0.00012501354072633842, | |
| "loss": 3.4758, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 1.7568224428859107, | |
| "grad_norm": 1.2424238920211792, | |
| "learning_rate": 0.00012431914450385388, | |
| "loss": 3.4405, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 1.7637664051107562, | |
| "grad_norm": 1.6950280666351318, | |
| "learning_rate": 0.00012362474828136934, | |
| "loss": 3.5296, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 1.7707103673356017, | |
| "grad_norm": 2.1729133129119873, | |
| "learning_rate": 0.00012293035205888477, | |
| "loss": 3.4733, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 1.7776543295604472, | |
| "grad_norm": 1.6061240434646606, | |
| "learning_rate": 0.00012223595583640023, | |
| "loss": 3.4562, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 1.7845982917852927, | |
| "grad_norm": 2.095271587371826, | |
| "learning_rate": 0.0001215415596139157, | |
| "loss": 3.4618, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 1.7915422540101382, | |
| "grad_norm": 2.206932306289673, | |
| "learning_rate": 0.00012084716339143114, | |
| "loss": 3.4577, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 1.7984862162349837, | |
| "grad_norm": 1.7425895929336548, | |
| "learning_rate": 0.0001201527671689466, | |
| "loss": 3.4979, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 1.8054301784598292, | |
| "grad_norm": 2.1199731826782227, | |
| "learning_rate": 0.00011945837094646204, | |
| "loss": 3.4951, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 1.8123741406846747, | |
| "grad_norm": 1.4702428579330444, | |
| "learning_rate": 0.00011876397472397749, | |
| "loss": 3.4778, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 1.81931810290952, | |
| "grad_norm": 1.5938681364059448, | |
| "learning_rate": 0.00011806957850149295, | |
| "loss": 3.4782, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 1.8262620651343657, | |
| "grad_norm": 1.5015869140625, | |
| "learning_rate": 0.0001173751822790084, | |
| "loss": 3.4367, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 1.833206027359211, | |
| "grad_norm": 1.8470075130462646, | |
| "learning_rate": 0.00011668078605652385, | |
| "loss": 3.4463, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 1.8401499895840567, | |
| "grad_norm": 2.0054242610931396, | |
| "learning_rate": 0.0001159863898340393, | |
| "loss": 3.4445, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 1.847093951808902, | |
| "grad_norm": 1.4376716613769531, | |
| "learning_rate": 0.00011529199361155473, | |
| "loss": 3.4449, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 1.8540379140337477, | |
| "grad_norm": 2.702432870864868, | |
| "learning_rate": 0.0001145975973890702, | |
| "loss": 3.4499, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 1.860981876258593, | |
| "grad_norm": 1.6058772802352905, | |
| "learning_rate": 0.00011390320116658565, | |
| "loss": 3.4567, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 1.8679258384834387, | |
| "grad_norm": 3.2056682109832764, | |
| "learning_rate": 0.00011320880494410108, | |
| "loss": 3.4845, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 1.874869800708284, | |
| "grad_norm": 1.6341466903686523, | |
| "learning_rate": 0.00011251440872161656, | |
| "loss": 3.4699, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 1.8818137629331297, | |
| "grad_norm": 1.4821678400039673, | |
| "learning_rate": 0.00011182001249913199, | |
| "loss": 3.453, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 1.888757725157975, | |
| "grad_norm": 2.204435110092163, | |
| "learning_rate": 0.00011112561627664743, | |
| "loss": 3.3845, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 1.8957016873828207, | |
| "grad_norm": 2.4527945518493652, | |
| "learning_rate": 0.0001104312200541629, | |
| "loss": 3.4595, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 1.902645649607666, | |
| "grad_norm": 1.7573269605636597, | |
| "learning_rate": 0.00010973682383167834, | |
| "loss": 3.4554, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 1.9095896118325117, | |
| "grad_norm": 2.0512332916259766, | |
| "learning_rate": 0.00010904242760919379, | |
| "loss": 3.4537, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 1.916533574057357, | |
| "grad_norm": 2.056835174560547, | |
| "learning_rate": 0.00010834803138670924, | |
| "loss": 3.441, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 1.9234775362822027, | |
| "grad_norm": 4.340169906616211, | |
| "learning_rate": 0.00010765363516422469, | |
| "loss": 3.4806, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 1.930421498507048, | |
| "grad_norm": 2.0016748905181885, | |
| "learning_rate": 0.00010695923894174015, | |
| "loss": 3.4363, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 1.9373654607318938, | |
| "grad_norm": 1.8290444612503052, | |
| "learning_rate": 0.0001062648427192556, | |
| "loss": 3.4428, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 1.944309422956739, | |
| "grad_norm": 1.6090489625930786, | |
| "learning_rate": 0.00010557044649677104, | |
| "loss": 3.4514, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 1.9512533851815848, | |
| "grad_norm": 1.5943214893341064, | |
| "learning_rate": 0.0001048760502742865, | |
| "loss": 3.425, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 1.95819734740643, | |
| "grad_norm": 1.9066240787506104, | |
| "learning_rate": 0.00010418165405180195, | |
| "loss": 3.4637, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 1.9651413096312758, | |
| "grad_norm": 1.718125820159912, | |
| "learning_rate": 0.00010348725782931739, | |
| "loss": 3.4784, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 1.972085271856121, | |
| "grad_norm": 1.8587770462036133, | |
| "learning_rate": 0.00010279286160683285, | |
| "loss": 3.4058, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 1.9790292340809668, | |
| "grad_norm": 2.709913492202759, | |
| "learning_rate": 0.0001020984653843483, | |
| "loss": 3.4442, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 1.985973196305812, | |
| "grad_norm": 1.3006025552749634, | |
| "learning_rate": 0.00010140406916186374, | |
| "loss": 3.394, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 1.9929171585306575, | |
| "grad_norm": 1.3316704034805298, | |
| "learning_rate": 0.0001007096729393792, | |
| "loss": 3.4307, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 1.999861120755503, | |
| "grad_norm": 2.0763180255889893, | |
| "learning_rate": 0.00010001527671689465, | |
| "loss": 3.4408, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 3.239363193511963, | |
| "eval_rouge1": 0.055521599130116214, | |
| "eval_rouge2": 0.013459250920580113, | |
| "eval_rougeL": 0.054963256550744514, | |
| "eval_rougeLsum": 0.05513512231622069, | |
| "eval_runtime": 3954.943, | |
| "eval_samples_per_second": 4.046, | |
| "eval_steps_per_second": 2.023, | |
| "step": 144010 | |
| }, | |
| { | |
| "epoch": 2.0068050829803488, | |
| "grad_norm": 2.819636106491089, | |
| "learning_rate": 9.932088049441011e-05, | |
| "loss": 3.3738, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 2.013749045205194, | |
| "grad_norm": 1.5194923877716064, | |
| "learning_rate": 9.862648427192555e-05, | |
| "loss": 3.3729, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 2.0206930074300398, | |
| "grad_norm": 1.9506241083145142, | |
| "learning_rate": 9.7932088049441e-05, | |
| "loss": 3.3605, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 2.027636969654885, | |
| "grad_norm": 2.37030029296875, | |
| "learning_rate": 9.723769182695646e-05, | |
| "loss": 3.354, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 2.0345809318797308, | |
| "grad_norm": 1.798161506652832, | |
| "learning_rate": 9.65432956044719e-05, | |
| "loss": 3.3393, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 2.041524894104576, | |
| "grad_norm": 1.7773220539093018, | |
| "learning_rate": 9.584889938198735e-05, | |
| "loss": 3.3564, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 2.0484688563294218, | |
| "grad_norm": 1.383130669593811, | |
| "learning_rate": 9.515450315950281e-05, | |
| "loss": 3.3493, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 2.055412818554267, | |
| "grad_norm": 1.8702272176742554, | |
| "learning_rate": 9.446010693701825e-05, | |
| "loss": 3.3706, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 2.0623567807791128, | |
| "grad_norm": 2.419377326965332, | |
| "learning_rate": 9.37657107145337e-05, | |
| "loss": 3.369, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 2.069300743003958, | |
| "grad_norm": 1.842032551765442, | |
| "learning_rate": 9.307131449204916e-05, | |
| "loss": 3.3367, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 2.0762447052288033, | |
| "grad_norm": 1.890652060508728, | |
| "learning_rate": 9.23769182695646e-05, | |
| "loss": 3.3245, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 2.083188667453649, | |
| "grad_norm": 1.6845180988311768, | |
| "learning_rate": 9.168252204708005e-05, | |
| "loss": 3.3037, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 2.0901326296784943, | |
| "grad_norm": 1.8656178712844849, | |
| "learning_rate": 9.098812582459551e-05, | |
| "loss": 3.3236, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 2.09707659190334, | |
| "grad_norm": 1.933976173400879, | |
| "learning_rate": 9.029372960211096e-05, | |
| "loss": 3.3425, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 2.1040205541281853, | |
| "grad_norm": 1.2388135194778442, | |
| "learning_rate": 8.959933337962642e-05, | |
| "loss": 3.3654, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 2.110964516353031, | |
| "grad_norm": 2.0735113620758057, | |
| "learning_rate": 8.890493715714186e-05, | |
| "loss": 3.329, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 2.1179084785778763, | |
| "grad_norm": 1.460747241973877, | |
| "learning_rate": 8.821054093465731e-05, | |
| "loss": 3.3325, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 2.124852440802722, | |
| "grad_norm": 1.381603479385376, | |
| "learning_rate": 8.751614471217277e-05, | |
| "loss": 3.3549, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 2.1317964030275673, | |
| "grad_norm": 2.0302138328552246, | |
| "learning_rate": 8.682174848968821e-05, | |
| "loss": 3.3099, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 2.138740365252413, | |
| "grad_norm": 1.4594683647155762, | |
| "learning_rate": 8.612735226720366e-05, | |
| "loss": 3.3213, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 2.1456843274772583, | |
| "grad_norm": 1.5398012399673462, | |
| "learning_rate": 8.543295604471912e-05, | |
| "loss": 3.3918, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 2.152628289702104, | |
| "grad_norm": 1.6090102195739746, | |
| "learning_rate": 8.473855982223456e-05, | |
| "loss": 3.3241, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 2.1595722519269493, | |
| "grad_norm": 1.5653716325759888, | |
| "learning_rate": 8.404416359975001e-05, | |
| "loss": 3.3673, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 2.166516214151795, | |
| "grad_norm": 1.3755892515182495, | |
| "learning_rate": 8.334976737726547e-05, | |
| "loss": 3.3085, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 2.1734601763766404, | |
| "grad_norm": 2.4337687492370605, | |
| "learning_rate": 8.265537115478091e-05, | |
| "loss": 3.3441, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 2.180404138601486, | |
| "grad_norm": 1.5850881338119507, | |
| "learning_rate": 8.196097493229637e-05, | |
| "loss": 3.3193, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 2.1873481008263314, | |
| "grad_norm": 2.200465679168701, | |
| "learning_rate": 8.126657870981182e-05, | |
| "loss": 3.3384, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 2.194292063051177, | |
| "grad_norm": 2.115725517272949, | |
| "learning_rate": 8.057218248732725e-05, | |
| "loss": 3.3447, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 2.2012360252760224, | |
| "grad_norm": 1.528279423713684, | |
| "learning_rate": 7.987778626484272e-05, | |
| "loss": 3.3155, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 2.208179987500868, | |
| "grad_norm": 1.796518087387085, | |
| "learning_rate": 7.918339004235816e-05, | |
| "loss": 3.3501, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 2.2151239497257134, | |
| "grad_norm": 2.301734685897827, | |
| "learning_rate": 7.84889938198736e-05, | |
| "loss": 3.3132, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 2.222067911950559, | |
| "grad_norm": 1.7091695070266724, | |
| "learning_rate": 7.779459759738908e-05, | |
| "loss": 3.3373, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 2.2290118741754044, | |
| "grad_norm": 1.247341275215149, | |
| "learning_rate": 7.710020137490451e-05, | |
| "loss": 3.3235, | |
| "step": 160500 | |
| }, | |
| { | |
| "epoch": 2.23595583640025, | |
| "grad_norm": 3.0286383628845215, | |
| "learning_rate": 7.640580515241995e-05, | |
| "loss": 3.3162, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 2.2428997986250954, | |
| "grad_norm": 1.669455885887146, | |
| "learning_rate": 7.571140892993541e-05, | |
| "loss": 3.2822, | |
| "step": 161500 | |
| }, | |
| { | |
| "epoch": 2.249843760849941, | |
| "grad_norm": 1.5718942880630493, | |
| "learning_rate": 7.501701270745086e-05, | |
| "loss": 3.3213, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 2.2567877230747864, | |
| "grad_norm": 1.445328950881958, | |
| "learning_rate": 7.432261648496632e-05, | |
| "loss": 3.3117, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 2.263731685299632, | |
| "grad_norm": 2.205052614212036, | |
| "learning_rate": 7.362822026248176e-05, | |
| "loss": 3.3613, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 2.2706756475244774, | |
| "grad_norm": 2.702474594116211, | |
| "learning_rate": 7.293382403999721e-05, | |
| "loss": 3.3368, | |
| "step": 163500 | |
| }, | |
| { | |
| "epoch": 2.277619609749323, | |
| "grad_norm": 2.0145928859710693, | |
| "learning_rate": 7.223942781751267e-05, | |
| "loss": 3.3162, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 2.2845635719741684, | |
| "grad_norm": 1.5684378147125244, | |
| "learning_rate": 7.154503159502811e-05, | |
| "loss": 3.3532, | |
| "step": 164500 | |
| }, | |
| { | |
| "epoch": 2.291507534199014, | |
| "grad_norm": 1.6014593839645386, | |
| "learning_rate": 7.085063537254357e-05, | |
| "loss": 3.3014, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 2.2984514964238594, | |
| "grad_norm": 1.8588491678237915, | |
| "learning_rate": 7.015623915005902e-05, | |
| "loss": 3.353, | |
| "step": 165500 | |
| }, | |
| { | |
| "epoch": 2.305395458648705, | |
| "grad_norm": 1.7128487825393677, | |
| "learning_rate": 6.946184292757447e-05, | |
| "loss": 3.31, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 2.3123394208735504, | |
| "grad_norm": 1.5588475465774536, | |
| "learning_rate": 6.876744670508992e-05, | |
| "loss": 3.3193, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 2.319283383098396, | |
| "grad_norm": 1.8460384607315063, | |
| "learning_rate": 6.807305048260537e-05, | |
| "loss": 3.3158, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 2.3262273453232414, | |
| "grad_norm": 1.634889006614685, | |
| "learning_rate": 6.737865426012082e-05, | |
| "loss": 3.3089, | |
| "step": 167500 | |
| }, | |
| { | |
| "epoch": 2.333171307548087, | |
| "grad_norm": 2.4914541244506836, | |
| "learning_rate": 6.668425803763628e-05, | |
| "loss": 3.328, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 2.3401152697729324, | |
| "grad_norm": 1.2565484046936035, | |
| "learning_rate": 6.598986181515172e-05, | |
| "loss": 3.2968, | |
| "step": 168500 | |
| }, | |
| { | |
| "epoch": 2.347059231997778, | |
| "grad_norm": 2.460926055908203, | |
| "learning_rate": 6.529546559266717e-05, | |
| "loss": 3.353, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 2.3540031942226234, | |
| "grad_norm": 2.0668790340423584, | |
| "learning_rate": 6.460106937018263e-05, | |
| "loss": 3.3118, | |
| "step": 169500 | |
| }, | |
| { | |
| "epoch": 2.360947156447469, | |
| "grad_norm": 2.0417802333831787, | |
| "learning_rate": 6.390667314769807e-05, | |
| "loss": 3.3098, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 2.3678911186723144, | |
| "grad_norm": 4.113938808441162, | |
| "learning_rate": 6.321227692521352e-05, | |
| "loss": 3.3298, | |
| "step": 170500 | |
| }, | |
| { | |
| "epoch": 2.37483508089716, | |
| "grad_norm": 1.9370335340499878, | |
| "learning_rate": 6.251788070272896e-05, | |
| "loss": 3.3009, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 2.3817790431220054, | |
| "grad_norm": 2.2328431606292725, | |
| "learning_rate": 6.182348448024442e-05, | |
| "loss": 3.3067, | |
| "step": 171500 | |
| }, | |
| { | |
| "epoch": 2.3887230053468507, | |
| "grad_norm": 1.4481734037399292, | |
| "learning_rate": 6.112908825775988e-05, | |
| "loss": 3.3408, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 2.3956669675716964, | |
| "grad_norm": 1.1176230907440186, | |
| "learning_rate": 6.043469203527532e-05, | |
| "loss": 3.3586, | |
| "step": 172500 | |
| }, | |
| { | |
| "epoch": 2.402610929796542, | |
| "grad_norm": 2.566291332244873, | |
| "learning_rate": 5.9740295812790774e-05, | |
| "loss": 3.2948, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 2.4095548920213874, | |
| "grad_norm": 1.9052931070327759, | |
| "learning_rate": 5.9045899590306226e-05, | |
| "loss": 3.3025, | |
| "step": 173500 | |
| }, | |
| { | |
| "epoch": 2.4164988542462327, | |
| "grad_norm": 1.9683756828308105, | |
| "learning_rate": 5.835150336782167e-05, | |
| "loss": 3.3348, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 2.4234428164710784, | |
| "grad_norm": 3.174572706222534, | |
| "learning_rate": 5.7657107145337125e-05, | |
| "loss": 3.329, | |
| "step": 174500 | |
| }, | |
| { | |
| "epoch": 2.430386778695924, | |
| "grad_norm": 2.3149008750915527, | |
| "learning_rate": 5.696271092285258e-05, | |
| "loss": 3.2889, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 2.4373307409207694, | |
| "grad_norm": 1.6363497972488403, | |
| "learning_rate": 5.626831470036803e-05, | |
| "loss": 3.3258, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 2.4442747031456147, | |
| "grad_norm": 1.2083947658538818, | |
| "learning_rate": 5.5573918477883476e-05, | |
| "loss": 3.3456, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 2.4512186653704604, | |
| "grad_norm": 2.0313804149627686, | |
| "learning_rate": 5.487952225539893e-05, | |
| "loss": 3.3154, | |
| "step": 176500 | |
| }, | |
| { | |
| "epoch": 2.458162627595306, | |
| "grad_norm": 1.4387134313583374, | |
| "learning_rate": 5.418512603291438e-05, | |
| "loss": 3.303, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 2.4651065898201514, | |
| "grad_norm": 1.9549680948257446, | |
| "learning_rate": 5.349072981042983e-05, | |
| "loss": 3.309, | |
| "step": 177500 | |
| }, | |
| { | |
| "epoch": 2.4720505520449967, | |
| "grad_norm": 1.7797924280166626, | |
| "learning_rate": 5.279633358794527e-05, | |
| "loss": 3.3338, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 2.4789945142698424, | |
| "grad_norm": 1.5036529302597046, | |
| "learning_rate": 5.2101937365460725e-05, | |
| "loss": 3.3122, | |
| "step": 178500 | |
| }, | |
| { | |
| "epoch": 2.4859384764946877, | |
| "grad_norm": 2.212462902069092, | |
| "learning_rate": 5.140754114297618e-05, | |
| "loss": 3.2841, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 2.4928824387195334, | |
| "grad_norm": 3.3562114238739014, | |
| "learning_rate": 5.071314492049162e-05, | |
| "loss": 3.3017, | |
| "step": 179500 | |
| }, | |
| { | |
| "epoch": 2.4998264009443787, | |
| "grad_norm": 1.547029733657837, | |
| "learning_rate": 5.0018748698007076e-05, | |
| "loss": 3.3182, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 2.5067703631692244, | |
| "grad_norm": 1.6302788257598877, | |
| "learning_rate": 4.932435247552253e-05, | |
| "loss": 3.2912, | |
| "step": 180500 | |
| }, | |
| { | |
| "epoch": 2.5137143253940697, | |
| "grad_norm": 2.3086509704589844, | |
| "learning_rate": 4.862995625303798e-05, | |
| "loss": 3.2617, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 2.5206582876189154, | |
| "grad_norm": 1.8361918926239014, | |
| "learning_rate": 4.7935560030553427e-05, | |
| "loss": 3.3025, | |
| "step": 181500 | |
| }, | |
| { | |
| "epoch": 2.5276022498437607, | |
| "grad_norm": 2.591750144958496, | |
| "learning_rate": 4.724116380806888e-05, | |
| "loss": 3.3148, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 2.5345462120686064, | |
| "grad_norm": 1.2800657749176025, | |
| "learning_rate": 4.654676758558433e-05, | |
| "loss": 3.3176, | |
| "step": 182500 | |
| }, | |
| { | |
| "epoch": 2.5414901742934517, | |
| "grad_norm": 1.7381142377853394, | |
| "learning_rate": 4.585237136309978e-05, | |
| "loss": 3.2937, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 2.5484341365182974, | |
| "grad_norm": 1.6898601055145264, | |
| "learning_rate": 4.515797514061523e-05, | |
| "loss": 3.2955, | |
| "step": 183500 | |
| }, | |
| { | |
| "epoch": 2.5553780987431427, | |
| "grad_norm": 1.9518952369689941, | |
| "learning_rate": 4.446357891813068e-05, | |
| "loss": 3.3093, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 2.5623220609679884, | |
| "grad_norm": 1.4132803678512573, | |
| "learning_rate": 4.3769182695646135e-05, | |
| "loss": 3.3142, | |
| "step": 184500 | |
| }, | |
| { | |
| "epoch": 2.5692660231928337, | |
| "grad_norm": 1.304002046585083, | |
| "learning_rate": 4.307478647316158e-05, | |
| "loss": 3.2875, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 2.5762099854176794, | |
| "grad_norm": 3.1700334548950195, | |
| "learning_rate": 4.2380390250677033e-05, | |
| "loss": 3.3445, | |
| "step": 185500 | |
| }, | |
| { | |
| "epoch": 2.5831539476425247, | |
| "grad_norm": 1.5128083229064941, | |
| "learning_rate": 4.1685994028192486e-05, | |
| "loss": 3.3174, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 2.5900979098673704, | |
| "grad_norm": 2.8518435955047607, | |
| "learning_rate": 4.099159780570794e-05, | |
| "loss": 3.2766, | |
| "step": 186500 | |
| }, | |
| { | |
| "epoch": 2.5970418720922157, | |
| "grad_norm": 1.2636958360671997, | |
| "learning_rate": 4.0297201583223384e-05, | |
| "loss": 3.3032, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 2.6039858343170614, | |
| "grad_norm": 2.1779420375823975, | |
| "learning_rate": 3.960280536073884e-05, | |
| "loss": 3.3202, | |
| "step": 187500 | |
| }, | |
| { | |
| "epoch": 2.6109297965419067, | |
| "grad_norm": 1.8627387285232544, | |
| "learning_rate": 3.890840913825429e-05, | |
| "loss": 3.2831, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 2.6178737587667524, | |
| "grad_norm": 2.000037670135498, | |
| "learning_rate": 3.821401291576973e-05, | |
| "loss": 3.3431, | |
| "step": 188500 | |
| }, | |
| { | |
| "epoch": 2.6248177209915977, | |
| "grad_norm": 1.3186124563217163, | |
| "learning_rate": 3.751961669328519e-05, | |
| "loss": 3.3161, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 2.6317616832164434, | |
| "grad_norm": 1.7129555940628052, | |
| "learning_rate": 3.682522047080064e-05, | |
| "loss": 3.2901, | |
| "step": 189500 | |
| }, | |
| { | |
| "epoch": 2.6387056454412887, | |
| "grad_norm": 1.727120041847229, | |
| "learning_rate": 3.6130824248316086e-05, | |
| "loss": 3.3028, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 2.6456496076661344, | |
| "grad_norm": 2.223973512649536, | |
| "learning_rate": 3.543642802583154e-05, | |
| "loss": 3.3096, | |
| "step": 190500 | |
| }, | |
| { | |
| "epoch": 2.6525935698909797, | |
| "grad_norm": 1.908118486404419, | |
| "learning_rate": 3.4742031803346984e-05, | |
| "loss": 3.2971, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 2.6595375321158254, | |
| "grad_norm": 1.4966055154800415, | |
| "learning_rate": 3.404763558086244e-05, | |
| "loss": 3.3041, | |
| "step": 191500 | |
| }, | |
| { | |
| "epoch": 2.6664814943406707, | |
| "grad_norm": 1.5852959156036377, | |
| "learning_rate": 3.335323935837789e-05, | |
| "loss": 3.3396, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 2.6734254565655164, | |
| "grad_norm": 1.956778883934021, | |
| "learning_rate": 3.2658843135893335e-05, | |
| "loss": 3.3153, | |
| "step": 192500 | |
| }, | |
| { | |
| "epoch": 2.6803694187903617, | |
| "grad_norm": 1.6665066480636597, | |
| "learning_rate": 3.196444691340879e-05, | |
| "loss": 3.3051, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 2.6873133810152074, | |
| "grad_norm": 1.5020607709884644, | |
| "learning_rate": 3.127005069092424e-05, | |
| "loss": 3.2956, | |
| "step": 193500 | |
| }, | |
| { | |
| "epoch": 2.6942573432400527, | |
| "grad_norm": 1.861676573753357, | |
| "learning_rate": 3.057565446843969e-05, | |
| "loss": 3.3266, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 2.701201305464898, | |
| "grad_norm": 1.6980831623077393, | |
| "learning_rate": 2.988125824595514e-05, | |
| "loss": 3.2896, | |
| "step": 194500 | |
| }, | |
| { | |
| "epoch": 2.7081452676897437, | |
| "grad_norm": 3.289989709854126, | |
| "learning_rate": 2.9186862023470588e-05, | |
| "loss": 3.2898, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 2.7150892299145895, | |
| "grad_norm": 1.6162209510803223, | |
| "learning_rate": 2.849246580098604e-05, | |
| "loss": 3.2698, | |
| "step": 195500 | |
| }, | |
| { | |
| "epoch": 2.7220331921394347, | |
| "grad_norm": 4.295835018157959, | |
| "learning_rate": 2.779806957850149e-05, | |
| "loss": 3.2747, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 2.72897715436428, | |
| "grad_norm": 2.021383762359619, | |
| "learning_rate": 2.7103673356016942e-05, | |
| "loss": 3.2866, | |
| "step": 196500 | |
| }, | |
| { | |
| "epoch": 2.7359211165891257, | |
| "grad_norm": 5.153833389282227, | |
| "learning_rate": 2.640927713353239e-05, | |
| "loss": 3.3057, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 2.7428650788139715, | |
| "grad_norm": 1.4507516622543335, | |
| "learning_rate": 2.5714880911047844e-05, | |
| "loss": 3.2617, | |
| "step": 197500 | |
| }, | |
| { | |
| "epoch": 2.7498090410388167, | |
| "grad_norm": 1.0930436849594116, | |
| "learning_rate": 2.5020484688563293e-05, | |
| "loss": 3.3113, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 2.756753003263662, | |
| "grad_norm": 2.5461559295654297, | |
| "learning_rate": 2.4326088466078745e-05, | |
| "loss": 3.3073, | |
| "step": 198500 | |
| }, | |
| { | |
| "epoch": 2.7636969654885077, | |
| "grad_norm": 1.3845510482788086, | |
| "learning_rate": 2.363169224359419e-05, | |
| "loss": 3.2805, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 2.7706409277133535, | |
| "grad_norm": 1.5489321947097778, | |
| "learning_rate": 2.293729602110964e-05, | |
| "loss": 3.2863, | |
| "step": 199500 | |
| }, | |
| { | |
| "epoch": 2.7775848899381987, | |
| "grad_norm": 1.2201488018035889, | |
| "learning_rate": 2.2242899798625093e-05, | |
| "loss": 3.2959, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 2.784528852163044, | |
| "grad_norm": 1.8551557064056396, | |
| "learning_rate": 2.1548503576140542e-05, | |
| "loss": 3.3299, | |
| "step": 200500 | |
| }, | |
| { | |
| "epoch": 2.7914728143878897, | |
| "grad_norm": 2.28908371925354, | |
| "learning_rate": 2.0854107353655995e-05, | |
| "loss": 3.2962, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 2.7984167766127355, | |
| "grad_norm": 2.0773096084594727, | |
| "learning_rate": 2.0159711131171444e-05, | |
| "loss": 3.2798, | |
| "step": 201500 | |
| }, | |
| { | |
| "epoch": 2.8053607388375807, | |
| "grad_norm": 2.611323833465576, | |
| "learning_rate": 1.9465314908686896e-05, | |
| "loss": 3.3228, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 2.812304701062426, | |
| "grad_norm": 2.0584192276000977, | |
| "learning_rate": 1.8770918686202346e-05, | |
| "loss": 3.3112, | |
| "step": 202500 | |
| }, | |
| { | |
| "epoch": 2.8192486632872718, | |
| "grad_norm": 3.291172981262207, | |
| "learning_rate": 1.8076522463717795e-05, | |
| "loss": 3.3141, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 2.8261926255121175, | |
| "grad_norm": 2.5255441665649414, | |
| "learning_rate": 1.7382126241233247e-05, | |
| "loss": 3.2951, | |
| "step": 203500 | |
| }, | |
| { | |
| "epoch": 2.8331365877369628, | |
| "grad_norm": 1.3763819932937622, | |
| "learning_rate": 1.6687730018748696e-05, | |
| "loss": 3.294, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 2.840080549961808, | |
| "grad_norm": 1.9026843309402466, | |
| "learning_rate": 1.599333379626415e-05, | |
| "loss": 3.3003, | |
| "step": 204500 | |
| }, | |
| { | |
| "epoch": 2.8470245121866538, | |
| "grad_norm": 1.6121410131454468, | |
| "learning_rate": 1.5298937573779598e-05, | |
| "loss": 3.3096, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 2.8539684744114995, | |
| "grad_norm": 2.3993430137634277, | |
| "learning_rate": 1.4604541351295047e-05, | |
| "loss": 3.3075, | |
| "step": 205500 | |
| }, | |
| { | |
| "epoch": 2.8609124366363448, | |
| "grad_norm": 1.6766456365585327, | |
| "learning_rate": 1.3910145128810498e-05, | |
| "loss": 3.2873, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 2.86785639886119, | |
| "grad_norm": 2.1749913692474365, | |
| "learning_rate": 1.3215748906325947e-05, | |
| "loss": 3.2432, | |
| "step": 206500 | |
| }, | |
| { | |
| "epoch": 2.8748003610860358, | |
| "grad_norm": 1.5734447240829468, | |
| "learning_rate": 1.2521352683841398e-05, | |
| "loss": 3.2501, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 2.881744323310881, | |
| "grad_norm": 1.3672767877578735, | |
| "learning_rate": 1.1826956461356849e-05, | |
| "loss": 3.292, | |
| "step": 207500 | |
| }, | |
| { | |
| "epoch": 2.8886882855357268, | |
| "grad_norm": 3.4438602924346924, | |
| "learning_rate": 1.11325602388723e-05, | |
| "loss": 3.2755, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 2.895632247760572, | |
| "grad_norm": 3.021101474761963, | |
| "learning_rate": 1.043816401638775e-05, | |
| "loss": 3.2599, | |
| "step": 208500 | |
| }, | |
| { | |
| "epoch": 2.9025762099854178, | |
| "grad_norm": 1.2644829750061035, | |
| "learning_rate": 9.743767793903202e-06, | |
| "loss": 3.291, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 2.909520172210263, | |
| "grad_norm": 1.7406469583511353, | |
| "learning_rate": 9.04937157141865e-06, | |
| "loss": 3.2871, | |
| "step": 209500 | |
| }, | |
| { | |
| "epoch": 2.9164641344351088, | |
| "grad_norm": 1.8715460300445557, | |
| "learning_rate": 8.354975348934102e-06, | |
| "loss": 3.3359, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 2.923408096659954, | |
| "grad_norm": 1.7464805841445923, | |
| "learning_rate": 7.660579126449552e-06, | |
| "loss": 3.263, | |
| "step": 210500 | |
| }, | |
| { | |
| "epoch": 2.9303520588847998, | |
| "grad_norm": 1.6525601148605347, | |
| "learning_rate": 6.966182903965002e-06, | |
| "loss": 3.2654, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 2.937296021109645, | |
| "grad_norm": 2.9503705501556396, | |
| "learning_rate": 6.2717866814804524e-06, | |
| "loss": 3.2797, | |
| "step": 211500 | |
| }, | |
| { | |
| "epoch": 2.9442399833344908, | |
| "grad_norm": 1.702645182609558, | |
| "learning_rate": 5.5773904589959024e-06, | |
| "loss": 3.291, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 2.951183945559336, | |
| "grad_norm": 1.7340339422225952, | |
| "learning_rate": 4.882994236511353e-06, | |
| "loss": 3.3051, | |
| "step": 212500 | |
| }, | |
| { | |
| "epoch": 2.9581279077841818, | |
| "grad_norm": 1.9832649230957031, | |
| "learning_rate": 4.188598014026803e-06, | |
| "loss": 3.2941, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 2.965071870009027, | |
| "grad_norm": 2.080734968185425, | |
| "learning_rate": 3.4942017915422537e-06, | |
| "loss": 3.3281, | |
| "step": 213500 | |
| }, | |
| { | |
| "epoch": 2.972015832233873, | |
| "grad_norm": 1.7874020338058472, | |
| "learning_rate": 2.799805569057704e-06, | |
| "loss": 3.2567, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 2.978959794458718, | |
| "grad_norm": 1.32713782787323, | |
| "learning_rate": 2.1054093465731546e-06, | |
| "loss": 3.3156, | |
| "step": 214500 | |
| }, | |
| { | |
| "epoch": 2.985903756683564, | |
| "grad_norm": 2.1903092861175537, | |
| "learning_rate": 1.4110131240886048e-06, | |
| "loss": 3.2949, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 2.992847718908409, | |
| "grad_norm": 1.8510949611663818, | |
| "learning_rate": 7.166169016040551e-07, | |
| "loss": 3.3084, | |
| "step": 215500 | |
| }, | |
| { | |
| "epoch": 2.999791681133255, | |
| "grad_norm": 1.916053056716919, | |
| "learning_rate": 2.222067911950559e-08, | |
| "loss": 3.2611, | |
| "step": 216000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 216015, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.3301724526811136e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |