{ "best_global_step": 8688, "best_metric": 3.460914399511239e-08, "best_model_checkpoint": "./results/qwen_2.5_3b/dora/checkpoint-8688", "epoch": 3.0, "eval_steps": 500, "global_step": 8688, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003453038674033149, "grad_norm": 0.790235698223114, "learning_rate": 0.000299689226519337, "loss": 1.9751, "step": 10 }, { "epoch": 0.006906077348066298, "grad_norm": 0.12320765107870102, "learning_rate": 0.0002993439226519337, "loss": 0.1637, "step": 20 }, { "epoch": 0.010359116022099447, "grad_norm": 0.005471355281770229, "learning_rate": 0.00029899861878453037, "loss": 0.0006, "step": 30 }, { "epoch": 0.013812154696132596, "grad_norm": 0.002303406596183777, "learning_rate": 0.00029865331491712706, "loss": 0.0001, "step": 40 }, { "epoch": 0.017265193370165747, "grad_norm": 0.0015583776403218508, "learning_rate": 0.0002983080110497237, "loss": 0.0, "step": 50 }, { "epoch": 0.020718232044198894, "grad_norm": 0.0010585383279249072, "learning_rate": 0.00029796270718232044, "loss": 0.0, "step": 60 }, { "epoch": 0.024171270718232045, "grad_norm": 0.0006631984142586589, "learning_rate": 0.00029761740331491713, "loss": 0.0, "step": 70 }, { "epoch": 0.027624309392265192, "grad_norm": 0.00044978229561820626, "learning_rate": 0.00029727209944751376, "loss": 0.0, "step": 80 }, { "epoch": 0.031077348066298343, "grad_norm": 0.0003509189991746098, "learning_rate": 0.00029692679558011045, "loss": 0.0, "step": 90 }, { "epoch": 0.034530386740331494, "grad_norm": 0.00028745076269842684, "learning_rate": 0.00029658149171270714, "loss": 0.0, "step": 100 }, { "epoch": 0.03798342541436464, "grad_norm": 0.00025074262521229684, "learning_rate": 0.00029623618784530383, "loss": 0.0, "step": 110 }, { "epoch": 0.04143646408839779, "grad_norm": 0.00022001242905389518, "learning_rate": 0.0002958908839779005, "loss": 0.0, "step": 120 }, { "epoch": 0.04488950276243094, "grad_norm": 0.0001964504917850718, "learning_rate": 0.0002955455801104972, "loss": 0.0, "step": 130 }, { "epoch": 0.04834254143646409, "grad_norm": 0.00017869958537630737, "learning_rate": 0.0002952002762430939, "loss": 0.0, "step": 140 }, { "epoch": 0.05179558011049724, "grad_norm": 0.00016258594405371696, "learning_rate": 0.0002948549723756906, "loss": 0.0, "step": 150 }, { "epoch": 0.055248618784530384, "grad_norm": 0.00015105163038242608, "learning_rate": 0.0002945096685082873, "loss": 0.0, "step": 160 }, { "epoch": 0.05870165745856354, "grad_norm": 0.0001383009657729417, "learning_rate": 0.00029416436464088397, "loss": 0.0, "step": 170 }, { "epoch": 0.062154696132596686, "grad_norm": 0.00012864168093074113, "learning_rate": 0.0002938190607734806, "loss": 0.0, "step": 180 }, { "epoch": 0.06560773480662983, "grad_norm": 0.00012088521179975942, "learning_rate": 0.00029347375690607735, "loss": 0.0, "step": 190 }, { "epoch": 0.06906077348066299, "grad_norm": 0.00011210545198991895, "learning_rate": 0.000293128453038674, "loss": 0.0, "step": 200 }, { "epoch": 0.07251381215469613, "grad_norm": 0.0001059524220181629, "learning_rate": 0.0002927831491712707, "loss": 0.0, "step": 210 }, { "epoch": 0.07596685082872928, "grad_norm": 9.968437370844185e-05, "learning_rate": 0.00029243784530386737, "loss": 0.0, "step": 220 }, { "epoch": 0.07941988950276244, "grad_norm": 9.436205436941236e-05, "learning_rate": 0.00029209254143646406, "loss": 0.0, "step": 230 }, { "epoch": 0.08287292817679558, "grad_norm": 8.917079685488716e-05, "learning_rate": 0.00029174723756906075, "loss": 0.0, "step": 240 }, { "epoch": 0.08632596685082873, "grad_norm": 8.480295218760148e-05, "learning_rate": 0.00029140193370165744, "loss": 0.0, "step": 250 }, { "epoch": 0.08977900552486189, "grad_norm": 8.014165359782055e-05, "learning_rate": 0.00029105662983425413, "loss": 0.0, "step": 260 }, { "epoch": 0.09323204419889503, "grad_norm": 7.662668213015422e-05, "learning_rate": 0.0002907113259668508, "loss": 0.0, "step": 270 }, { "epoch": 0.09668508287292818, "grad_norm": 7.3047231126111e-05, "learning_rate": 0.0002903660220994475, "loss": 0.0, "step": 280 }, { "epoch": 0.10013812154696132, "grad_norm": 6.930085510248318e-05, "learning_rate": 0.0002900207182320442, "loss": 0.0, "step": 290 }, { "epoch": 0.10359116022099447, "grad_norm": 6.680389924440533e-05, "learning_rate": 0.00028967541436464083, "loss": 0.0, "step": 300 }, { "epoch": 0.10704419889502763, "grad_norm": 6.42253871774301e-05, "learning_rate": 0.0002893301104972375, "loss": 0.0, "step": 310 }, { "epoch": 0.11049723756906077, "grad_norm": 6.105229840613902e-05, "learning_rate": 0.00028898480662983427, "loss": 0.0, "step": 320 }, { "epoch": 0.11395027624309392, "grad_norm": 5.882488039787859e-05, "learning_rate": 0.0002886395027624309, "loss": 0.0, "step": 330 }, { "epoch": 0.11740331491712708, "grad_norm": 5.6571421737316996e-05, "learning_rate": 0.0002882941988950276, "loss": 0.0, "step": 340 }, { "epoch": 0.12085635359116022, "grad_norm": 5.461975888465531e-05, "learning_rate": 0.0002879488950276243, "loss": 0.0, "step": 350 }, { "epoch": 0.12430939226519337, "grad_norm": 5.222026084084064e-05, "learning_rate": 0.00028760359116022097, "loss": 0.0, "step": 360 }, { "epoch": 0.1277624309392265, "grad_norm": 5.0601196562638506e-05, "learning_rate": 0.00028725828729281766, "loss": 0.0, "step": 370 }, { "epoch": 0.13121546961325967, "grad_norm": 4.846305455430411e-05, "learning_rate": 0.00028691298342541435, "loss": 0.0, "step": 380 }, { "epoch": 0.13466850828729282, "grad_norm": 4.7008568799356e-05, "learning_rate": 0.00028656767955801104, "loss": 0.0, "step": 390 }, { "epoch": 0.13812154696132597, "grad_norm": 4.55831759609282e-05, "learning_rate": 0.0002862223756906077, "loss": 0.0, "step": 400 }, { "epoch": 0.14157458563535913, "grad_norm": 4.39039031334687e-05, "learning_rate": 0.0002858770718232044, "loss": 0.0, "step": 410 }, { "epoch": 0.14502762430939226, "grad_norm": 4.2708965338533744e-05, "learning_rate": 0.0002855317679558011, "loss": 0.0, "step": 420 }, { "epoch": 0.1484806629834254, "grad_norm": 4.123576218262315e-05, "learning_rate": 0.00028518646408839775, "loss": 0.0, "step": 430 }, { "epoch": 0.15193370165745856, "grad_norm": 3.9969352656044066e-05, "learning_rate": 0.00028484116022099444, "loss": 0.0, "step": 440 }, { "epoch": 0.15538674033149172, "grad_norm": 3.8744772609788924e-05, "learning_rate": 0.0002844958563535911, "loss": 0.0, "step": 450 }, { "epoch": 0.15883977900552487, "grad_norm": 3.7482088373508304e-05, "learning_rate": 0.0002841505524861878, "loss": 0.0, "step": 460 }, { "epoch": 0.162292817679558, "grad_norm": 3.6892488424200565e-05, "learning_rate": 0.0002838052486187845, "loss": 0.0, "step": 470 }, { "epoch": 0.16574585635359115, "grad_norm": 3.5695826227311045e-05, "learning_rate": 0.0002834599447513812, "loss": 0.0, "step": 480 }, { "epoch": 0.1691988950276243, "grad_norm": 3.455656042206101e-05, "learning_rate": 0.0002831146408839779, "loss": 0.0, "step": 490 }, { "epoch": 0.17265193370165746, "grad_norm": 3.347896199556999e-05, "learning_rate": 0.0002827693370165745, "loss": 0.0, "step": 500 }, { "epoch": 0.17610497237569062, "grad_norm": 3.296266368124634e-05, "learning_rate": 0.00028242403314917127, "loss": 0.0, "step": 510 }, { "epoch": 0.17955801104972377, "grad_norm": 3.2005242246668786e-05, "learning_rate": 0.00028207872928176796, "loss": 0.0, "step": 520 }, { "epoch": 0.1830110497237569, "grad_norm": 3.1267038139048964e-05, "learning_rate": 0.0002817334254143646, "loss": 0.0, "step": 530 }, { "epoch": 0.18646408839779005, "grad_norm": 3.0512275770888664e-05, "learning_rate": 0.00028138812154696134, "loss": 0.0, "step": 540 }, { "epoch": 0.1899171270718232, "grad_norm": 2.996620605699718e-05, "learning_rate": 0.00028104281767955797, "loss": 0.0, "step": 550 }, { "epoch": 0.19337016574585636, "grad_norm": 2.9056487619527616e-05, "learning_rate": 0.00028069751381215466, "loss": 0.0, "step": 560 }, { "epoch": 0.1968232044198895, "grad_norm": 2.8300528356339782e-05, "learning_rate": 0.00028035220994475135, "loss": 0.0, "step": 570 }, { "epoch": 0.20027624309392264, "grad_norm": 2.7861147827934474e-05, "learning_rate": 0.00028000690607734804, "loss": 0.0, "step": 580 }, { "epoch": 0.2037292817679558, "grad_norm": 2.7006239179172553e-05, "learning_rate": 0.00027966160220994473, "loss": 0.0, "step": 590 }, { "epoch": 0.20718232044198895, "grad_norm": 2.6430538127897307e-05, "learning_rate": 0.0002793162983425414, "loss": 0.0, "step": 600 }, { "epoch": 0.2106353591160221, "grad_norm": 2.5890338292811066e-05, "learning_rate": 0.0002789709944751381, "loss": 0.0, "step": 610 }, { "epoch": 0.21408839779005526, "grad_norm": 2.5378456484759226e-05, "learning_rate": 0.0002786256906077348, "loss": 0.0, "step": 620 }, { "epoch": 0.2175414364640884, "grad_norm": 2.4685317839612253e-05, "learning_rate": 0.00027828038674033144, "loss": 0.0, "step": 630 }, { "epoch": 0.22099447513812154, "grad_norm": 2.4229449991253205e-05, "learning_rate": 0.0002779350828729282, "loss": 0.0, "step": 640 }, { "epoch": 0.2244475138121547, "grad_norm": 2.3789914848748595e-05, "learning_rate": 0.0002775897790055248, "loss": 0.0, "step": 650 }, { "epoch": 0.22790055248618785, "grad_norm": 2.311296520929318e-05, "learning_rate": 0.0002772444751381215, "loss": 0.0, "step": 660 }, { "epoch": 0.231353591160221, "grad_norm": 2.2746791728422977e-05, "learning_rate": 0.00027689917127071825, "loss": 0.0, "step": 670 }, { "epoch": 0.23480662983425415, "grad_norm": 2.236058935523033e-05, "learning_rate": 0.0002765538674033149, "loss": 0.0, "step": 680 }, { "epoch": 0.23825966850828728, "grad_norm": 2.1688076230930164e-05, "learning_rate": 0.0002762085635359116, "loss": 0.0, "step": 690 }, { "epoch": 0.24171270718232044, "grad_norm": 2.1283045498421416e-05, "learning_rate": 0.00027586325966850826, "loss": 0.0, "step": 700 }, { "epoch": 0.2451657458563536, "grad_norm": 2.0801953724003397e-05, "learning_rate": 0.00027551795580110495, "loss": 0.0, "step": 710 }, { "epoch": 0.24861878453038674, "grad_norm": 2.0418101485120133e-05, "learning_rate": 0.00027517265193370164, "loss": 0.0, "step": 720 }, { "epoch": 0.2520718232044199, "grad_norm": 1.9899263861589134e-05, "learning_rate": 0.00027482734806629833, "loss": 0.0, "step": 730 }, { "epoch": 0.255524861878453, "grad_norm": 1.9446213627816178e-05, "learning_rate": 0.000274482044198895, "loss": 0.0, "step": 740 }, { "epoch": 0.2589779005524862, "grad_norm": 1.905839963001199e-05, "learning_rate": 0.00027413674033149166, "loss": 0.0, "step": 750 }, { "epoch": 0.26243093922651933, "grad_norm": 1.8560838725534268e-05, "learning_rate": 0.00027379143646408835, "loss": 0.0, "step": 760 }, { "epoch": 0.26588397790055246, "grad_norm": 1.839939250203315e-05, "learning_rate": 0.0002734461325966851, "loss": 0.0, "step": 770 }, { "epoch": 0.26933701657458564, "grad_norm": 1.8152511984226294e-05, "learning_rate": 0.00027310082872928173, "loss": 0.0, "step": 780 }, { "epoch": 0.27279005524861877, "grad_norm": 1.7737207599566318e-05, "learning_rate": 0.0002727555248618784, "loss": 0.0, "step": 790 }, { "epoch": 0.27624309392265195, "grad_norm": 1.741879532346502e-05, "learning_rate": 0.0002724102209944751, "loss": 0.0, "step": 800 }, { "epoch": 0.2796961325966851, "grad_norm": 1.704100941424258e-05, "learning_rate": 0.0002720649171270718, "loss": 0.0, "step": 810 }, { "epoch": 0.28314917127071826, "grad_norm": 1.6744712411309592e-05, "learning_rate": 0.0002717196132596685, "loss": 0.0, "step": 820 }, { "epoch": 0.2866022099447514, "grad_norm": 1.6424670320702717e-05, "learning_rate": 0.0002713743093922652, "loss": 0.0, "step": 830 }, { "epoch": 0.2900552486187845, "grad_norm": 1.6274163499474525e-05, "learning_rate": 0.00027102900552486187, "loss": 0.0, "step": 840 }, { "epoch": 0.2935082872928177, "grad_norm": 1.5978015653672628e-05, "learning_rate": 0.0002706837016574585, "loss": 0.0, "step": 850 }, { "epoch": 0.2969613259668508, "grad_norm": 1.5768708180985413e-05, "learning_rate": 0.00027033839779005525, "loss": 0.0, "step": 860 }, { "epoch": 0.300414364640884, "grad_norm": 1.539581899123732e-05, "learning_rate": 0.00026999309392265194, "loss": 0.0, "step": 870 }, { "epoch": 0.30386740331491713, "grad_norm": 1.5208648619591258e-05, "learning_rate": 0.0002696477900552486, "loss": 0.0, "step": 880 }, { "epoch": 0.30732044198895025, "grad_norm": 1.501227689004736e-05, "learning_rate": 0.00026930248618784526, "loss": 0.0, "step": 890 }, { "epoch": 0.31077348066298344, "grad_norm": 1.4740814549440984e-05, "learning_rate": 0.00026895718232044195, "loss": 0.0, "step": 900 }, { "epoch": 0.31422651933701656, "grad_norm": 1.4488658962363843e-05, "learning_rate": 0.00026861187845303864, "loss": 0.0, "step": 910 }, { "epoch": 0.31767955801104975, "grad_norm": 1.4271197869675234e-05, "learning_rate": 0.00026826657458563533, "loss": 0.0, "step": 920 }, { "epoch": 0.32113259668508287, "grad_norm": 1.4018479305377696e-05, "learning_rate": 0.000267921270718232, "loss": 0.0, "step": 930 }, { "epoch": 0.324585635359116, "grad_norm": 1.3931013199908193e-05, "learning_rate": 0.0002675759668508287, "loss": 0.0, "step": 940 }, { "epoch": 0.3280386740331492, "grad_norm": 1.3679036783287302e-05, "learning_rate": 0.0002672306629834254, "loss": 0.0, "step": 950 }, { "epoch": 0.3314917127071823, "grad_norm": 1.3464121366268955e-05, "learning_rate": 0.0002668853591160221, "loss": 0.0, "step": 960 }, { "epoch": 0.3349447513812155, "grad_norm": 1.321551371802343e-05, "learning_rate": 0.00026654005524861873, "loss": 0.0, "step": 970 }, { "epoch": 0.3383977900552486, "grad_norm": 1.3039380974078085e-05, "learning_rate": 0.0002661947513812154, "loss": 0.0, "step": 980 }, { "epoch": 0.34185082872928174, "grad_norm": 1.2868403246102389e-05, "learning_rate": 0.00026584944751381216, "loss": 0.0, "step": 990 }, { "epoch": 0.3453038674033149, "grad_norm": 1.2609441910171881e-05, "learning_rate": 0.0002655041436464088, "loss": 0.0, "step": 1000 }, { "epoch": 0.34875690607734805, "grad_norm": 1.2483363207138609e-05, "learning_rate": 0.0002651588397790055, "loss": 0.0, "step": 1010 }, { "epoch": 0.35220994475138123, "grad_norm": 1.2320079804339912e-05, "learning_rate": 0.0002648135359116022, "loss": 0.0, "step": 1020 }, { "epoch": 0.35566298342541436, "grad_norm": 1.2183991202618927e-05, "learning_rate": 0.00026446823204419887, "loss": 0.0, "step": 1030 }, { "epoch": 0.35911602209944754, "grad_norm": 1.1978218026342802e-05, "learning_rate": 0.00026412292817679556, "loss": 0.0, "step": 1040 }, { "epoch": 0.36256906077348067, "grad_norm": 1.1780178283515852e-05, "learning_rate": 0.00026377762430939225, "loss": 0.0, "step": 1050 }, { "epoch": 0.3660220994475138, "grad_norm": 1.1626724699453916e-05, "learning_rate": 0.00026343232044198894, "loss": 0.0, "step": 1060 }, { "epoch": 0.369475138121547, "grad_norm": 1.1469895980553702e-05, "learning_rate": 0.0002630870165745856, "loss": 0.0, "step": 1070 }, { "epoch": 0.3729281767955801, "grad_norm": 1.1300082405796275e-05, "learning_rate": 0.0002627417127071823, "loss": 0.0, "step": 1080 }, { "epoch": 0.3763812154696133, "grad_norm": 1.1115320376120508e-05, "learning_rate": 0.000262396408839779, "loss": 0.0, "step": 1090 }, { "epoch": 0.3798342541436464, "grad_norm": 1.0973885764542501e-05, "learning_rate": 0.00026205110497237564, "loss": 0.0, "step": 1100 }, { "epoch": 0.38328729281767954, "grad_norm": 1.0806589671119582e-05, "learning_rate": 0.00026170580110497233, "loss": 0.0, "step": 1110 }, { "epoch": 0.3867403314917127, "grad_norm": 1.067289304046426e-05, "learning_rate": 0.000261360497237569, "loss": 0.0, "step": 1120 }, { "epoch": 0.39019337016574585, "grad_norm": 1.0515844223846216e-05, "learning_rate": 0.0002610151933701657, "loss": 0.0, "step": 1130 }, { "epoch": 0.393646408839779, "grad_norm": 1.0407337867945898e-05, "learning_rate": 0.0002606698895027624, "loss": 0.0, "step": 1140 }, { "epoch": 0.39709944751381215, "grad_norm": 1.0311589903722052e-05, "learning_rate": 0.0002603245856353591, "loss": 0.0, "step": 1150 }, { "epoch": 0.4005524861878453, "grad_norm": 1.0220475815003738e-05, "learning_rate": 0.0002599792817679558, "loss": 0.0, "step": 1160 }, { "epoch": 0.40400552486187846, "grad_norm": 1.0054594895336777e-05, "learning_rate": 0.00025963397790055247, "loss": 0.0, "step": 1170 }, { "epoch": 0.4074585635359116, "grad_norm": 1.0019272849604022e-05, "learning_rate": 0.00025928867403314916, "loss": 0.0, "step": 1180 }, { "epoch": 0.41091160220994477, "grad_norm": 9.847994078882039e-06, "learning_rate": 0.00025894337016574585, "loss": 0.0, "step": 1190 }, { "epoch": 0.4143646408839779, "grad_norm": 9.67161849985132e-06, "learning_rate": 0.0002585980662983425, "loss": 0.0, "step": 1200 }, { "epoch": 0.417817679558011, "grad_norm": 9.50271623878507e-06, "learning_rate": 0.00025825276243093923, "loss": 0.0, "step": 1210 }, { "epoch": 0.4212707182320442, "grad_norm": 9.40611516853096e-06, "learning_rate": 0.00025790745856353587, "loss": 0.0, "step": 1220 }, { "epoch": 0.42472375690607733, "grad_norm": 9.328221494797617e-06, "learning_rate": 0.00025756215469613256, "loss": 0.0, "step": 1230 }, { "epoch": 0.4281767955801105, "grad_norm": 9.182170288113412e-06, "learning_rate": 0.00025721685082872925, "loss": 0.0, "step": 1240 }, { "epoch": 0.43162983425414364, "grad_norm": 9.046670129464474e-06, "learning_rate": 0.00025687154696132594, "loss": 0.0, "step": 1250 }, { "epoch": 0.4350828729281768, "grad_norm": 8.954982149589341e-06, "learning_rate": 0.0002565262430939226, "loss": 0.0, "step": 1260 }, { "epoch": 0.43853591160220995, "grad_norm": 8.881350368028507e-06, "learning_rate": 0.0002561809392265193, "loss": 0.0, "step": 1270 }, { "epoch": 0.4419889502762431, "grad_norm": 8.688957677804865e-06, "learning_rate": 0.000255835635359116, "loss": 0.0, "step": 1280 }, { "epoch": 0.44544198895027626, "grad_norm": 8.607896234025247e-06, "learning_rate": 0.0002554903314917127, "loss": 0.0, "step": 1290 }, { "epoch": 0.4488950276243094, "grad_norm": 8.515012268617284e-06, "learning_rate": 0.0002551450276243094, "loss": 0.0, "step": 1300 }, { "epoch": 0.45234806629834257, "grad_norm": 8.418283869104926e-06, "learning_rate": 0.0002547997237569061, "loss": 0.0, "step": 1310 }, { "epoch": 0.4558011049723757, "grad_norm": 8.24214566819137e-06, "learning_rate": 0.0002544544198895027, "loss": 0.0, "step": 1320 }, { "epoch": 0.4592541436464088, "grad_norm": 8.122386134346016e-06, "learning_rate": 0.0002541091160220994, "loss": 0.0, "step": 1330 }, { "epoch": 0.462707182320442, "grad_norm": 8.03600050858222e-06, "learning_rate": 0.00025376381215469615, "loss": 0.0, "step": 1340 }, { "epoch": 0.4661602209944751, "grad_norm": 7.947605809022207e-06, "learning_rate": 0.0002534185082872928, "loss": 0.0, "step": 1350 }, { "epoch": 0.4696132596685083, "grad_norm": 7.884407750680111e-06, "learning_rate": 0.00025307320441988947, "loss": 0.0, "step": 1360 }, { "epoch": 0.47306629834254144, "grad_norm": 7.804223059793003e-06, "learning_rate": 0.00025272790055248616, "loss": 0.0, "step": 1370 }, { "epoch": 0.47651933701657456, "grad_norm": 7.734531209280249e-06, "learning_rate": 0.00025238259668508285, "loss": 0.0, "step": 1380 }, { "epoch": 0.47997237569060774, "grad_norm": 7.716504114796408e-06, "learning_rate": 0.00025203729281767954, "loss": 0.0, "step": 1390 }, { "epoch": 0.48342541436464087, "grad_norm": 7.5548778113443404e-06, "learning_rate": 0.00025169198895027623, "loss": 0.0, "step": 1400 }, { "epoch": 0.48687845303867405, "grad_norm": 7.528684818680631e-06, "learning_rate": 0.0002513466850828729, "loss": 0.0, "step": 1410 }, { "epoch": 0.4903314917127072, "grad_norm": 7.452832960552769e-06, "learning_rate": 0.00025100138121546956, "loss": 0.0, "step": 1420 }, { "epoch": 0.4937845303867403, "grad_norm": 7.327893854380818e-06, "learning_rate": 0.0002506560773480663, "loss": 0.0, "step": 1430 }, { "epoch": 0.4972375690607735, "grad_norm": 7.234123131638626e-06, "learning_rate": 0.000250310773480663, "loss": 0.0, "step": 1440 }, { "epoch": 0.5006906077348067, "grad_norm": 7.216851827251958e-06, "learning_rate": 0.0002499654696132596, "loss": 0.0, "step": 1450 }, { "epoch": 0.5041436464088398, "grad_norm": 7.072778771544108e-06, "learning_rate": 0.0002496201657458563, "loss": 0.0, "step": 1460 }, { "epoch": 0.5075966850828729, "grad_norm": 7.052627097436925e-06, "learning_rate": 0.000249274861878453, "loss": 0.0, "step": 1470 }, { "epoch": 0.511049723756906, "grad_norm": 6.952190233278088e-06, "learning_rate": 0.0002489295580110497, "loss": 0.0, "step": 1480 }, { "epoch": 0.5145027624309392, "grad_norm": 6.863036105642095e-06, "learning_rate": 0.0002485842541436464, "loss": 0.0, "step": 1490 }, { "epoch": 0.5179558011049724, "grad_norm": 6.8347253545653075e-06, "learning_rate": 0.0002482389502762431, "loss": 0.0, "step": 1500 }, { "epoch": 0.5214088397790055, "grad_norm": 6.824547199357767e-06, "learning_rate": 0.00024789364640883976, "loss": 0.0, "step": 1510 }, { "epoch": 0.5248618784530387, "grad_norm": 6.747972292941995e-06, "learning_rate": 0.00024754834254143645, "loss": 0.0, "step": 1520 }, { "epoch": 0.5283149171270718, "grad_norm": 6.725328603351954e-06, "learning_rate": 0.00024720303867403314, "loss": 0.0, "step": 1530 }, { "epoch": 0.5317679558011049, "grad_norm": 6.6485899878898636e-06, "learning_rate": 0.00024685773480662983, "loss": 0.0, "step": 1540 }, { "epoch": 0.5352209944751382, "grad_norm": 6.656896403001156e-06, "learning_rate": 0.00024651243093922647, "loss": 0.0, "step": 1550 }, { "epoch": 0.5386740331491713, "grad_norm": 6.625449259445304e-06, "learning_rate": 0.0002461671270718232, "loss": 0.0, "step": 1560 }, { "epoch": 0.5421270718232044, "grad_norm": 6.595276317966636e-06, "learning_rate": 0.00024582182320441985, "loss": 0.0, "step": 1570 }, { "epoch": 0.5455801104972375, "grad_norm": 6.594296337425476e-06, "learning_rate": 0.00024547651933701654, "loss": 0.0, "step": 1580 }, { "epoch": 0.5490331491712708, "grad_norm": 6.541635684698122e-06, "learning_rate": 0.00024513121546961323, "loss": 0.0, "step": 1590 }, { "epoch": 0.5524861878453039, "grad_norm": 6.5000008362403605e-06, "learning_rate": 0.0002447859116022099, "loss": 0.0, "step": 1600 }, { "epoch": 0.555939226519337, "grad_norm": 6.519822818518151e-06, "learning_rate": 0.0002444406077348066, "loss": 0.0, "step": 1610 }, { "epoch": 0.5593922651933702, "grad_norm": 6.442455287469784e-06, "learning_rate": 0.00024409530386740327, "loss": 0.0, "step": 1620 }, { "epoch": 0.5628453038674033, "grad_norm": 6.476571797975339e-06, "learning_rate": 0.00024375, "loss": 0.0, "step": 1630 }, { "epoch": 0.5662983425414365, "grad_norm": 6.325207323243376e-06, "learning_rate": 0.00024340469613259668, "loss": 0.0, "step": 1640 }, { "epoch": 0.5697513812154696, "grad_norm": 6.294842478382634e-06, "learning_rate": 0.00024305939226519334, "loss": 0.0, "step": 1650 }, { "epoch": 0.5732044198895028, "grad_norm": 6.158011274237651e-06, "learning_rate": 0.00024271408839779003, "loss": 0.0, "step": 1660 }, { "epoch": 0.5766574585635359, "grad_norm": 6.166576440591598e-06, "learning_rate": 0.0002423687845303867, "loss": 0.0, "step": 1670 }, { "epoch": 0.580110497237569, "grad_norm": 6.007679985486902e-06, "learning_rate": 0.0002420234806629834, "loss": 0.0, "step": 1680 }, { "epoch": 0.5835635359116023, "grad_norm": 5.934072760283016e-06, "learning_rate": 0.0002416781767955801, "loss": 0.0, "step": 1690 }, { "epoch": 0.5870165745856354, "grad_norm": 5.792816409666557e-06, "learning_rate": 0.00024133287292817676, "loss": 0.0, "step": 1700 }, { "epoch": 0.5904696132596685, "grad_norm": 5.65560867471504e-06, "learning_rate": 0.00024098756906077345, "loss": 0.0, "step": 1710 }, { "epoch": 0.5939226519337016, "grad_norm": 5.680771664628992e-06, "learning_rate": 0.00024064226519337014, "loss": 0.0, "step": 1720 }, { "epoch": 0.5973756906077348, "grad_norm": 5.607774255622644e-06, "learning_rate": 0.00024029696132596683, "loss": 0.0, "step": 1730 }, { "epoch": 0.600828729281768, "grad_norm": 5.581618552241707e-06, "learning_rate": 0.00023995165745856352, "loss": 0.0, "step": 1740 }, { "epoch": 0.6042817679558011, "grad_norm": 5.52276969756349e-06, "learning_rate": 0.00023960635359116019, "loss": 0.0, "step": 1750 }, { "epoch": 0.6077348066298343, "grad_norm": 5.448110641737003e-06, "learning_rate": 0.0002392610497237569, "loss": 0.0, "step": 1760 }, { "epoch": 0.6111878453038674, "grad_norm": 5.416258318291511e-06, "learning_rate": 0.00023891574585635357, "loss": 0.0, "step": 1770 }, { "epoch": 0.6146408839779005, "grad_norm": 5.4046581681177486e-06, "learning_rate": 0.00023857044198895026, "loss": 0.0, "step": 1780 }, { "epoch": 0.6180939226519337, "grad_norm": 5.358014277589973e-06, "learning_rate": 0.00023822513812154695, "loss": 0.0, "step": 1790 }, { "epoch": 0.6215469613259669, "grad_norm": 5.296788913256023e-06, "learning_rate": 0.0002378798342541436, "loss": 0.0, "step": 1800 }, { "epoch": 0.625, "grad_norm": 5.28964119439479e-06, "learning_rate": 0.00023753453038674033, "loss": 0.0, "step": 1810 }, { "epoch": 0.6284530386740331, "grad_norm": 5.256103577266913e-06, "learning_rate": 0.000237189226519337, "loss": 0.0, "step": 1820 }, { "epoch": 0.6319060773480663, "grad_norm": 5.256808890408138e-06, "learning_rate": 0.00023684392265193368, "loss": 0.0, "step": 1830 }, { "epoch": 0.6353591160220995, "grad_norm": 5.202196007303428e-06, "learning_rate": 0.00023649861878453037, "loss": 0.0, "step": 1840 }, { "epoch": 0.6388121546961326, "grad_norm": 5.1987703955092e-06, "learning_rate": 0.00023615331491712706, "loss": 0.0, "step": 1850 }, { "epoch": 0.6422651933701657, "grad_norm": 5.191785930946935e-06, "learning_rate": 0.00023580801104972375, "loss": 0.0, "step": 1860 }, { "epoch": 0.6457182320441989, "grad_norm": 5.154100563231623e-06, "learning_rate": 0.0002354627071823204, "loss": 0.0, "step": 1870 }, { "epoch": 0.649171270718232, "grad_norm": 5.108336608827813e-06, "learning_rate": 0.0002351174033149171, "loss": 0.0, "step": 1880 }, { "epoch": 0.6526243093922652, "grad_norm": 5.074854016129393e-06, "learning_rate": 0.00023477209944751382, "loss": 0.0, "step": 1890 }, { "epoch": 0.6560773480662984, "grad_norm": 5.048243110650219e-06, "learning_rate": 0.00023442679558011048, "loss": 0.0, "step": 1900 }, { "epoch": 0.6595303867403315, "grad_norm": 4.960218575433828e-06, "learning_rate": 0.00023408149171270717, "loss": 0.0, "step": 1910 }, { "epoch": 0.6629834254143646, "grad_norm": 4.978487595508341e-06, "learning_rate": 0.00023373618784530383, "loss": 0.0, "step": 1920 }, { "epoch": 0.6664364640883977, "grad_norm": 4.924103905068478e-06, "learning_rate": 0.00023339088397790052, "loss": 0.0, "step": 1930 }, { "epoch": 0.669889502762431, "grad_norm": 4.864195943810046e-06, "learning_rate": 0.00023304558011049724, "loss": 0.0, "step": 1940 }, { "epoch": 0.6733425414364641, "grad_norm": 4.846776391786989e-06, "learning_rate": 0.0002327002762430939, "loss": 0.0, "step": 1950 }, { "epoch": 0.6767955801104972, "grad_norm": 4.773239197675139e-06, "learning_rate": 0.0002323549723756906, "loss": 0.0, "step": 1960 }, { "epoch": 0.6802486187845304, "grad_norm": 4.78357151223463e-06, "learning_rate": 0.00023200966850828725, "loss": 0.0, "step": 1970 }, { "epoch": 0.6837016574585635, "grad_norm": 4.688682111009257e-06, "learning_rate": 0.00023166436464088397, "loss": 0.0, "step": 1980 }, { "epoch": 0.6871546961325967, "grad_norm": 4.694340987043688e-06, "learning_rate": 0.00023131906077348066, "loss": 0.0, "step": 1990 }, { "epoch": 0.6906077348066298, "grad_norm": 4.659832029574318e-06, "learning_rate": 0.00023097375690607732, "loss": 0.0, "step": 2000 }, { "epoch": 0.694060773480663, "grad_norm": 4.641542091121664e-06, "learning_rate": 0.00023062845303867401, "loss": 0.0, "step": 2010 }, { "epoch": 0.6975138121546961, "grad_norm": 4.621041625796352e-06, "learning_rate": 0.00023028314917127068, "loss": 0.0, "step": 2020 }, { "epoch": 0.7009668508287292, "grad_norm": 4.511457518674433e-06, "learning_rate": 0.0002299378453038674, "loss": 0.0, "step": 2030 }, { "epoch": 0.7044198895027625, "grad_norm": 4.469067789614201e-06, "learning_rate": 0.00022959254143646408, "loss": 0.0, "step": 2040 }, { "epoch": 0.7078729281767956, "grad_norm": 4.468557108339155e-06, "learning_rate": 0.00022924723756906075, "loss": 0.0, "step": 2050 }, { "epoch": 0.7113259668508287, "grad_norm": 4.388516572362278e-06, "learning_rate": 0.00022890193370165744, "loss": 0.0, "step": 2060 }, { "epoch": 0.7147790055248618, "grad_norm": 4.380814061732963e-06, "learning_rate": 0.0002285566298342541, "loss": 0.0, "step": 2070 }, { "epoch": 0.7182320441988951, "grad_norm": 4.330190222390229e-06, "learning_rate": 0.00022821132596685082, "loss": 0.0, "step": 2080 }, { "epoch": 0.7216850828729282, "grad_norm": 4.303632067603758e-06, "learning_rate": 0.0002278660220994475, "loss": 0.0, "step": 2090 }, { "epoch": 0.7251381215469613, "grad_norm": 4.3032441681134515e-06, "learning_rate": 0.00022752071823204417, "loss": 0.0, "step": 2100 }, { "epoch": 0.7285911602209945, "grad_norm": 4.2278556975361425e-06, "learning_rate": 0.00022717541436464089, "loss": 0.0, "step": 2110 }, { "epoch": 0.7320441988950276, "grad_norm": 4.2008282434835564e-06, "learning_rate": 0.00022683011049723755, "loss": 0.0, "step": 2120 }, { "epoch": 0.7354972375690608, "grad_norm": 4.1849671106319875e-06, "learning_rate": 0.00022648480662983424, "loss": 0.0, "step": 2130 }, { "epoch": 0.738950276243094, "grad_norm": 4.139089014643105e-06, "learning_rate": 0.00022613950276243093, "loss": 0.0, "step": 2140 }, { "epoch": 0.7424033149171271, "grad_norm": 4.1413372855458874e-06, "learning_rate": 0.0002257941988950276, "loss": 0.0, "step": 2150 }, { "epoch": 0.7458563535911602, "grad_norm": 4.120540324947797e-06, "learning_rate": 0.0002254488950276243, "loss": 0.0, "step": 2160 }, { "epoch": 0.7493093922651933, "grad_norm": 4.088240075361682e-06, "learning_rate": 0.00022510359116022097, "loss": 0.0, "step": 2170 }, { "epoch": 0.7527624309392266, "grad_norm": 4.079929567524232e-06, "learning_rate": 0.00022475828729281766, "loss": 0.0, "step": 2180 }, { "epoch": 0.7562154696132597, "grad_norm": 4.000388344138628e-06, "learning_rate": 0.00022441298342541435, "loss": 0.0, "step": 2190 }, { "epoch": 0.7596685082872928, "grad_norm": 4.028375769848935e-06, "learning_rate": 0.000224067679558011, "loss": 0.0, "step": 2200 }, { "epoch": 0.763121546961326, "grad_norm": 3.9838760130805895e-06, "learning_rate": 0.00022372237569060773, "loss": 0.0, "step": 2210 }, { "epoch": 0.7665745856353591, "grad_norm": 3.980553628935013e-06, "learning_rate": 0.0002233770718232044, "loss": 0.0, "step": 2220 }, { "epoch": 0.7700276243093923, "grad_norm": 3.937139808840584e-06, "learning_rate": 0.00022303176795580108, "loss": 0.0, "step": 2230 }, { "epoch": 0.7734806629834254, "grad_norm": 3.844603725156048e-06, "learning_rate": 0.0002226864640883978, "loss": 0.0, "step": 2240 }, { "epoch": 0.7769337016574586, "grad_norm": 3.858749096252723e-06, "learning_rate": 0.00022234116022099446, "loss": 0.0, "step": 2250 }, { "epoch": 0.7803867403314917, "grad_norm": 3.8709140426362865e-06, "learning_rate": 0.00022199585635359115, "loss": 0.0, "step": 2260 }, { "epoch": 0.7838397790055248, "grad_norm": 3.834218205156503e-06, "learning_rate": 0.00022165055248618782, "loss": 0.0, "step": 2270 }, { "epoch": 0.787292817679558, "grad_norm": 3.834566996374633e-06, "learning_rate": 0.0002213052486187845, "loss": 0.0, "step": 2280 }, { "epoch": 0.7907458563535912, "grad_norm": 3.808058636423084e-06, "learning_rate": 0.00022095994475138122, "loss": 0.0, "step": 2290 }, { "epoch": 0.7941988950276243, "grad_norm": 3.798460511461599e-06, "learning_rate": 0.00022061464088397788, "loss": 0.0, "step": 2300 }, { "epoch": 0.7976519337016574, "grad_norm": 3.7754091408714885e-06, "learning_rate": 0.00022026933701657457, "loss": 0.0, "step": 2310 }, { "epoch": 0.8011049723756906, "grad_norm": 3.7456900372490054e-06, "learning_rate": 0.00021992403314917124, "loss": 0.0, "step": 2320 }, { "epoch": 0.8045580110497238, "grad_norm": 3.662698873085901e-06, "learning_rate": 0.00021957872928176793, "loss": 0.0, "step": 2330 }, { "epoch": 0.8080110497237569, "grad_norm": 3.647770881798351e-06, "learning_rate": 0.00021923342541436462, "loss": 0.0, "step": 2340 }, { "epoch": 0.81146408839779, "grad_norm": 3.691965957841603e-06, "learning_rate": 0.0002188881215469613, "loss": 0.0, "step": 2350 }, { "epoch": 0.8149171270718232, "grad_norm": 3.6197145618643845e-06, "learning_rate": 0.000218542817679558, "loss": 0.0, "step": 2360 }, { "epoch": 0.8183701657458563, "grad_norm": 3.6086605632590363e-06, "learning_rate": 0.00021819751381215466, "loss": 0.0, "step": 2370 }, { "epoch": 0.8218232044198895, "grad_norm": 3.5874443256034283e-06, "learning_rate": 0.00021785220994475138, "loss": 0.0, "step": 2380 }, { "epoch": 0.8252762430939227, "grad_norm": 3.514939635351766e-06, "learning_rate": 0.00021750690607734804, "loss": 0.0, "step": 2390 }, { "epoch": 0.8287292817679558, "grad_norm": 3.4948482152685756e-06, "learning_rate": 0.00021716160220994473, "loss": 0.0, "step": 2400 }, { "epoch": 0.8321823204419889, "grad_norm": 3.4897475416073576e-06, "learning_rate": 0.00021681629834254142, "loss": 0.0, "step": 2410 }, { "epoch": 0.835635359116022, "grad_norm": 3.5255850434623426e-06, "learning_rate": 0.00021647099447513808, "loss": 0.0, "step": 2420 }, { "epoch": 0.8390883977900553, "grad_norm": 3.4310128285142127e-06, "learning_rate": 0.0002161256906077348, "loss": 0.0, "step": 2430 }, { "epoch": 0.8425414364640884, "grad_norm": 3.45257240041974e-06, "learning_rate": 0.00021578038674033146, "loss": 0.0, "step": 2440 }, { "epoch": 0.8459944751381215, "grad_norm": 3.380175030542887e-06, "learning_rate": 0.00021543508287292815, "loss": 0.0, "step": 2450 }, { "epoch": 0.8494475138121547, "grad_norm": 3.3774360872484976e-06, "learning_rate": 0.00021508977900552484, "loss": 0.0, "step": 2460 }, { "epoch": 0.8529005524861878, "grad_norm": 3.3334117688355036e-06, "learning_rate": 0.00021474447513812153, "loss": 0.0, "step": 2470 }, { "epoch": 0.856353591160221, "grad_norm": 3.320184532640269e-06, "learning_rate": 0.00021439917127071822, "loss": 0.0, "step": 2480 }, { "epoch": 0.8598066298342542, "grad_norm": 3.3033059025910916e-06, "learning_rate": 0.00021405386740331488, "loss": 0.0, "step": 2490 }, { "epoch": 0.8632596685082873, "grad_norm": 3.3429071208956884e-06, "learning_rate": 0.00021370856353591157, "loss": 0.0, "step": 2500 }, { "epoch": 0.8667127071823204, "grad_norm": 3.322129259686335e-06, "learning_rate": 0.0002133632596685083, "loss": 0.0, "step": 2510 }, { "epoch": 0.8701657458563536, "grad_norm": 3.2624691357341362e-06, "learning_rate": 0.00021301795580110495, "loss": 0.0, "step": 2520 }, { "epoch": 0.8736187845303868, "grad_norm": 3.2443294912809506e-06, "learning_rate": 0.00021267265193370164, "loss": 0.0, "step": 2530 }, { "epoch": 0.8770718232044199, "grad_norm": 3.203938831575215e-06, "learning_rate": 0.0002123273480662983, "loss": 0.0, "step": 2540 }, { "epoch": 0.880524861878453, "grad_norm": 3.199280854460085e-06, "learning_rate": 0.000211982044198895, "loss": 0.0, "step": 2550 }, { "epoch": 0.8839779005524862, "grad_norm": 3.1772867714607855e-06, "learning_rate": 0.0002116367403314917, "loss": 0.0, "step": 2560 }, { "epoch": 0.8874309392265194, "grad_norm": 3.179902250849409e-06, "learning_rate": 0.00021129143646408838, "loss": 0.0, "step": 2570 }, { "epoch": 0.8908839779005525, "grad_norm": 3.181563215548522e-06, "learning_rate": 0.00021094613259668507, "loss": 0.0, "step": 2580 }, { "epoch": 0.8943370165745856, "grad_norm": 3.1959646094037453e-06, "learning_rate": 0.00021060082872928173, "loss": 0.0, "step": 2590 }, { "epoch": 0.8977900552486188, "grad_norm": 3.192745907654171e-06, "learning_rate": 0.00021025552486187845, "loss": 0.0, "step": 2600 }, { "epoch": 0.9012430939226519, "grad_norm": 3.15693864649802e-06, "learning_rate": 0.00020991022099447514, "loss": 0.0, "step": 2610 }, { "epoch": 0.9046961325966851, "grad_norm": 3.1329618650488555e-06, "learning_rate": 0.0002095649171270718, "loss": 0.0, "step": 2620 }, { "epoch": 0.9081491712707183, "grad_norm": 3.1028016564960126e-06, "learning_rate": 0.0002092196132596685, "loss": 0.0, "step": 2630 }, { "epoch": 0.9116022099447514, "grad_norm": 3.0701830837642774e-06, "learning_rate": 0.00020887430939226515, "loss": 0.0, "step": 2640 }, { "epoch": 0.9150552486187845, "grad_norm": 3.0590676942665596e-06, "learning_rate": 0.00020852900552486187, "loss": 0.0, "step": 2650 }, { "epoch": 0.9185082872928176, "grad_norm": 3.0405358302232344e-06, "learning_rate": 0.00020818370165745856, "loss": 0.0, "step": 2660 }, { "epoch": 0.9219613259668509, "grad_norm": 3.058064066863153e-06, "learning_rate": 0.00020783839779005522, "loss": 0.0, "step": 2670 }, { "epoch": 0.925414364640884, "grad_norm": 3.0166024771460798e-06, "learning_rate": 0.0002074930939226519, "loss": 0.0, "step": 2680 }, { "epoch": 0.9288674033149171, "grad_norm": 2.9608536351588555e-06, "learning_rate": 0.00020714779005524857, "loss": 0.0, "step": 2690 }, { "epoch": 0.9323204419889503, "grad_norm": 2.9063160127407173e-06, "learning_rate": 0.0002068024861878453, "loss": 0.0, "step": 2700 }, { "epoch": 0.9357734806629834, "grad_norm": 2.8341280540189473e-06, "learning_rate": 0.00020645718232044198, "loss": 0.0, "step": 2710 }, { "epoch": 0.9392265193370166, "grad_norm": 2.8546271551022073e-06, "learning_rate": 0.00020611187845303864, "loss": 0.0, "step": 2720 }, { "epoch": 0.9426795580110497, "grad_norm": 2.8830058909079526e-06, "learning_rate": 0.00020576657458563536, "loss": 0.0, "step": 2730 }, { "epoch": 0.9461325966850829, "grad_norm": 2.8231258966116e-06, "learning_rate": 0.00020542127071823202, "loss": 0.0, "step": 2740 }, { "epoch": 0.949585635359116, "grad_norm": 2.7639855488814646e-06, "learning_rate": 0.0002050759668508287, "loss": 0.0, "step": 2750 }, { "epoch": 0.9530386740331491, "grad_norm": 2.7595244773692684e-06, "learning_rate": 0.0002047306629834254, "loss": 0.0, "step": 2760 }, { "epoch": 0.9564917127071824, "grad_norm": 2.726336333580548e-06, "learning_rate": 0.00020438535911602206, "loss": 0.0, "step": 2770 }, { "epoch": 0.9599447513812155, "grad_norm": 2.7349974516255315e-06, "learning_rate": 0.00020404005524861878, "loss": 0.0, "step": 2780 }, { "epoch": 0.9633977900552486, "grad_norm": 2.721001692407299e-06, "learning_rate": 0.00020369475138121544, "loss": 0.0, "step": 2790 }, { "epoch": 0.9668508287292817, "grad_norm": 2.675122004802688e-06, "learning_rate": 0.00020334944751381213, "loss": 0.0, "step": 2800 }, { "epoch": 0.9703038674033149, "grad_norm": 2.745374558799085e-06, "learning_rate": 0.00020300414364640882, "loss": 0.0, "step": 2810 }, { "epoch": 0.9737569060773481, "grad_norm": 2.63419155999145e-06, "learning_rate": 0.0002026588397790055, "loss": 0.0, "step": 2820 }, { "epoch": 0.9772099447513812, "grad_norm": 2.653710225786199e-06, "learning_rate": 0.0002023135359116022, "loss": 0.0, "step": 2830 }, { "epoch": 0.9806629834254144, "grad_norm": 2.622340389279998e-06, "learning_rate": 0.00020196823204419887, "loss": 0.0, "step": 2840 }, { "epoch": 0.9841160220994475, "grad_norm": 2.605056806714856e-06, "learning_rate": 0.00020162292817679556, "loss": 0.0, "step": 2850 }, { "epoch": 0.9875690607734806, "grad_norm": 2.6075733785546618e-06, "learning_rate": 0.00020127762430939227, "loss": 0.0, "step": 2860 }, { "epoch": 0.9910220994475138, "grad_norm": 2.595723344711587e-06, "learning_rate": 0.00020093232044198894, "loss": 0.0, "step": 2870 }, { "epoch": 0.994475138121547, "grad_norm": 2.554031652834965e-06, "learning_rate": 0.00020058701657458563, "loss": 0.0, "step": 2880 }, { "epoch": 0.9979281767955801, "grad_norm": 2.550709496063064e-06, "learning_rate": 0.0002002417127071823, "loss": 0.0, "step": 2890 }, { "epoch": 1.0, "eval_loss": 1.6920024847877357e-07, "eval_runtime": 148.7161, "eval_samples_per_second": 155.78, "eval_steps_per_second": 4.868, "step": 2896 }, { "epoch": 1.0013812154696133, "grad_norm": 2.551917759774369e-06, "learning_rate": 0.00019989640883977898, "loss": 0.0, "step": 2900 }, { "epoch": 1.0048342541436464, "grad_norm": 2.541682988521643e-06, "learning_rate": 0.0001995511049723757, "loss": 0.0, "step": 2910 }, { "epoch": 1.0082872928176796, "grad_norm": 2.512448645575205e-06, "learning_rate": 0.00019920580110497236, "loss": 0.0, "step": 2920 }, { "epoch": 1.0117403314917126, "grad_norm": 2.558298319854657e-06, "learning_rate": 0.00019886049723756905, "loss": 0.0, "step": 2930 }, { "epoch": 1.0151933701657458, "grad_norm": 2.49071331381856e-06, "learning_rate": 0.0001985151933701657, "loss": 0.0, "step": 2940 }, { "epoch": 1.018646408839779, "grad_norm": 2.449503881507553e-06, "learning_rate": 0.0001981698895027624, "loss": 0.0, "step": 2950 }, { "epoch": 1.022099447513812, "grad_norm": 2.4836267584760208e-06, "learning_rate": 0.00019782458563535912, "loss": 0.0, "step": 2960 }, { "epoch": 1.0255524861878453, "grad_norm": 2.4479763851559255e-06, "learning_rate": 0.00019747928176795578, "loss": 0.0, "step": 2970 }, { "epoch": 1.0290055248618784, "grad_norm": 2.4364837827306474e-06, "learning_rate": 0.00019713397790055247, "loss": 0.0, "step": 2980 }, { "epoch": 1.0324585635359116, "grad_norm": 2.4582791411376093e-06, "learning_rate": 0.00019678867403314913, "loss": 0.0, "step": 2990 }, { "epoch": 1.0359116022099448, "grad_norm": 2.445655809424352e-06, "learning_rate": 0.00019644337016574585, "loss": 0.0, "step": 3000 }, { "epoch": 1.0393646408839778, "grad_norm": 2.421367298666155e-06, "learning_rate": 0.00019609806629834254, "loss": 0.0, "step": 3010 }, { "epoch": 1.042817679558011, "grad_norm": 2.469103037583409e-06, "learning_rate": 0.0001957527624309392, "loss": 0.0, "step": 3020 }, { "epoch": 1.046270718232044, "grad_norm": 2.3875829811004223e-06, "learning_rate": 0.0001954074585635359, "loss": 0.0, "step": 3030 }, { "epoch": 1.0497237569060773, "grad_norm": 2.443147423036862e-06, "learning_rate": 0.00019506215469613256, "loss": 0.0, "step": 3040 }, { "epoch": 1.0531767955801106, "grad_norm": 2.4172045414161403e-06, "learning_rate": 0.00019471685082872927, "loss": 0.0, "step": 3050 }, { "epoch": 1.0566298342541436, "grad_norm": 2.385328116361052e-06, "learning_rate": 0.00019437154696132596, "loss": 0.0, "step": 3060 }, { "epoch": 1.0600828729281768, "grad_norm": 2.3904685804154724e-06, "learning_rate": 0.00019402624309392263, "loss": 0.0, "step": 3070 }, { "epoch": 1.06353591160221, "grad_norm": 2.3990751287783496e-06, "learning_rate": 0.00019368093922651931, "loss": 0.0, "step": 3080 }, { "epoch": 1.066988950276243, "grad_norm": 2.343699634366203e-06, "learning_rate": 0.000193335635359116, "loss": 0.0, "step": 3090 }, { "epoch": 1.0704419889502763, "grad_norm": 2.3166887785919243e-06, "learning_rate": 0.0001929903314917127, "loss": 0.0, "step": 3100 }, { "epoch": 1.0738950276243093, "grad_norm": 2.321364036106388e-06, "learning_rate": 0.00019264502762430938, "loss": 0.0, "step": 3110 }, { "epoch": 1.0773480662983426, "grad_norm": 2.352271167183062e-06, "learning_rate": 0.00019229972375690605, "loss": 0.0, "step": 3120 }, { "epoch": 1.0808011049723758, "grad_norm": 2.3332563614530955e-06, "learning_rate": 0.00019195441988950276, "loss": 0.0, "step": 3130 }, { "epoch": 1.0842541436464088, "grad_norm": 2.254008904856164e-06, "learning_rate": 0.00019160911602209943, "loss": 0.0, "step": 3140 }, { "epoch": 1.087707182320442, "grad_norm": 2.2928154521650868e-06, "learning_rate": 0.00019126381215469612, "loss": 0.0, "step": 3150 }, { "epoch": 1.091160220994475, "grad_norm": 2.2589101718040183e-06, "learning_rate": 0.0001909185082872928, "loss": 0.0, "step": 3160 }, { "epoch": 1.0946132596685083, "grad_norm": 2.2225376596907154e-06, "learning_rate": 0.00019057320441988947, "loss": 0.0, "step": 3170 }, { "epoch": 1.0980662983425415, "grad_norm": 2.246873236799729e-06, "learning_rate": 0.0001902279005524862, "loss": 0.0, "step": 3180 }, { "epoch": 1.1015193370165746, "grad_norm": 2.2194474240677664e-06, "learning_rate": 0.00018988259668508285, "loss": 0.0, "step": 3190 }, { "epoch": 1.1049723756906078, "grad_norm": 2.15764157474041e-06, "learning_rate": 0.00018953729281767954, "loss": 0.0, "step": 3200 }, { "epoch": 1.1084254143646408, "grad_norm": 2.159821406166884e-06, "learning_rate": 0.00018919198895027623, "loss": 0.0, "step": 3210 }, { "epoch": 1.111878453038674, "grad_norm": 2.1812261365994345e-06, "learning_rate": 0.00018884668508287292, "loss": 0.0, "step": 3220 }, { "epoch": 1.1153314917127073, "grad_norm": 2.1112030026415596e-06, "learning_rate": 0.0001885013812154696, "loss": 0.0, "step": 3230 }, { "epoch": 1.1187845303867403, "grad_norm": 2.117289795933175e-06, "learning_rate": 0.00018815607734806627, "loss": 0.0, "step": 3240 }, { "epoch": 1.1222375690607735, "grad_norm": 2.1229154754109913e-06, "learning_rate": 0.00018781077348066296, "loss": 0.0, "step": 3250 }, { "epoch": 1.1256906077348066, "grad_norm": 2.033530336120748e-06, "learning_rate": 0.00018746546961325968, "loss": 0.0, "step": 3260 }, { "epoch": 1.1291436464088398, "grad_norm": 2.126247181877261e-06, "learning_rate": 0.00018712016574585634, "loss": 0.0, "step": 3270 }, { "epoch": 1.132596685082873, "grad_norm": 2.0348197722341865e-06, "learning_rate": 0.00018677486187845303, "loss": 0.0, "step": 3280 }, { "epoch": 1.136049723756906, "grad_norm": 2.0451604996196693e-06, "learning_rate": 0.0001864295580110497, "loss": 0.0, "step": 3290 }, { "epoch": 1.1395027624309393, "grad_norm": 2.0936658984282985e-06, "learning_rate": 0.00018608425414364638, "loss": 0.0, "step": 3300 }, { "epoch": 1.1429558011049723, "grad_norm": 2.009546960834996e-06, "learning_rate": 0.0001857389502762431, "loss": 0.0, "step": 3310 }, { "epoch": 1.1464088397790055, "grad_norm": 1.9912622519768775e-06, "learning_rate": 0.00018539364640883976, "loss": 0.0, "step": 3320 }, { "epoch": 1.1498618784530388, "grad_norm": 2.007360080824583e-06, "learning_rate": 0.00018504834254143645, "loss": 0.0, "step": 3330 }, { "epoch": 1.1533149171270718, "grad_norm": 1.9943070128647378e-06, "learning_rate": 0.00018470303867403312, "loss": 0.0, "step": 3340 }, { "epoch": 1.156767955801105, "grad_norm": 1.9940246147598373e-06, "learning_rate": 0.00018435773480662983, "loss": 0.0, "step": 3350 }, { "epoch": 1.160220994475138, "grad_norm": 1.9698327378137037e-06, "learning_rate": 0.00018401243093922652, "loss": 0.0, "step": 3360 }, { "epoch": 1.1636740331491713, "grad_norm": 1.9968067590525607e-06, "learning_rate": 0.00018366712707182319, "loss": 0.0, "step": 3370 }, { "epoch": 1.1671270718232045, "grad_norm": 1.964869170478778e-06, "learning_rate": 0.00018332182320441988, "loss": 0.0, "step": 3380 }, { "epoch": 1.1705801104972375, "grad_norm": 1.9953483842982678e-06, "learning_rate": 0.00018297651933701654, "loss": 0.0, "step": 3390 }, { "epoch": 1.1740331491712708, "grad_norm": 1.971204710571328e-06, "learning_rate": 0.00018263121546961326, "loss": 0.0, "step": 3400 }, { "epoch": 1.1774861878453038, "grad_norm": 2.0117531676078215e-06, "learning_rate": 0.00018228591160220995, "loss": 0.0, "step": 3410 }, { "epoch": 1.180939226519337, "grad_norm": 2.0123477497691056e-06, "learning_rate": 0.0001819406077348066, "loss": 0.0, "step": 3420 }, { "epoch": 1.1843922651933703, "grad_norm": 2.0204852262395434e-06, "learning_rate": 0.0001815953038674033, "loss": 0.0, "step": 3430 }, { "epoch": 1.1878453038674033, "grad_norm": 1.998101197386859e-06, "learning_rate": 0.00018124999999999996, "loss": 0.0, "step": 3440 }, { "epoch": 1.1912983425414365, "grad_norm": 1.9668029835884226e-06, "learning_rate": 0.00018090469613259668, "loss": 0.0, "step": 3450 }, { "epoch": 1.1947513812154695, "grad_norm": 2.018127815972548e-06, "learning_rate": 0.00018055939226519337, "loss": 0.0, "step": 3460 }, { "epoch": 1.1982044198895028, "grad_norm": 2.0065115222678287e-06, "learning_rate": 0.00018021408839779003, "loss": 0.0, "step": 3470 }, { "epoch": 1.201657458563536, "grad_norm": 1.9446426904323744e-06, "learning_rate": 0.00017986878453038675, "loss": 0.0, "step": 3480 }, { "epoch": 1.205110497237569, "grad_norm": 1.9386443455005065e-06, "learning_rate": 0.0001795234806629834, "loss": 0.0, "step": 3490 }, { "epoch": 1.2085635359116023, "grad_norm": 1.931727410919848e-06, "learning_rate": 0.0001791781767955801, "loss": 0.0, "step": 3500 }, { "epoch": 1.2120165745856353, "grad_norm": 2.0012034838146064e-06, "learning_rate": 0.0001788328729281768, "loss": 0.0, "step": 3510 }, { "epoch": 1.2154696132596685, "grad_norm": 2.003999043154181e-06, "learning_rate": 0.00017848756906077345, "loss": 0.0, "step": 3520 }, { "epoch": 1.2189226519337018, "grad_norm": 1.9966651052527595e-06, "learning_rate": 0.00017814226519337017, "loss": 0.0, "step": 3530 }, { "epoch": 1.2223756906077348, "grad_norm": 1.9025841311304248e-06, "learning_rate": 0.00017779696132596683, "loss": 0.0, "step": 3540 }, { "epoch": 1.225828729281768, "grad_norm": 1.926584673128673e-06, "learning_rate": 0.00017745165745856352, "loss": 0.0, "step": 3550 }, { "epoch": 1.229281767955801, "grad_norm": 1.965726369235199e-06, "learning_rate": 0.0001771063535911602, "loss": 0.0, "step": 3560 }, { "epoch": 1.2327348066298343, "grad_norm": 1.9580220396164805e-06, "learning_rate": 0.00017676104972375687, "loss": 0.0, "step": 3570 }, { "epoch": 1.2361878453038675, "grad_norm": 1.890119392555789e-06, "learning_rate": 0.0001764157458563536, "loss": 0.0, "step": 3580 }, { "epoch": 1.2396408839779005, "grad_norm": 1.9031103875022382e-06, "learning_rate": 0.00017607044198895025, "loss": 0.0, "step": 3590 }, { "epoch": 1.2430939226519337, "grad_norm": 1.913630740091321e-06, "learning_rate": 0.00017572513812154694, "loss": 0.0, "step": 3600 }, { "epoch": 1.2465469613259668, "grad_norm": 1.949870920725516e-06, "learning_rate": 0.00017537983425414363, "loss": 0.0, "step": 3610 }, { "epoch": 1.25, "grad_norm": 1.8914669226433034e-06, "learning_rate": 0.00017503453038674032, "loss": 0.0, "step": 3620 }, { "epoch": 1.2534530386740332, "grad_norm": 1.8446913827574463e-06, "learning_rate": 0.00017468922651933701, "loss": 0.0, "step": 3630 }, { "epoch": 1.2569060773480663, "grad_norm": 1.9008897425010218e-06, "learning_rate": 0.00017434392265193368, "loss": 0.0, "step": 3640 }, { "epoch": 1.2603591160220995, "grad_norm": 1.8661962712940294e-06, "learning_rate": 0.00017399861878453037, "loss": 0.0, "step": 3650 }, { "epoch": 1.2638121546961325, "grad_norm": 1.8599467921376345e-06, "learning_rate": 0.00017365331491712708, "loss": 0.0, "step": 3660 }, { "epoch": 1.2672651933701657, "grad_norm": 1.855186042121204e-06, "learning_rate": 0.00017330801104972375, "loss": 0.0, "step": 3670 }, { "epoch": 1.270718232044199, "grad_norm": 1.83218276106345e-06, "learning_rate": 0.00017296270718232044, "loss": 0.0, "step": 3680 }, { "epoch": 1.274171270718232, "grad_norm": 1.836074602579174e-06, "learning_rate": 0.0001726174033149171, "loss": 0.0, "step": 3690 }, { "epoch": 1.2776243093922652, "grad_norm": 1.8322474488741136e-06, "learning_rate": 0.0001722720994475138, "loss": 0.0, "step": 3700 }, { "epoch": 1.2810773480662982, "grad_norm": 1.901133487081097e-06, "learning_rate": 0.00017192679558011048, "loss": 0.0, "step": 3710 }, { "epoch": 1.2845303867403315, "grad_norm": 1.8246300896862522e-06, "learning_rate": 0.00017158149171270717, "loss": 0.0, "step": 3720 }, { "epoch": 1.2879834254143647, "grad_norm": 1.8076457308779936e-06, "learning_rate": 0.00017123618784530386, "loss": 0.0, "step": 3730 }, { "epoch": 1.2914364640883977, "grad_norm": 1.7727260228639352e-06, "learning_rate": 0.00017089088397790052, "loss": 0.0, "step": 3740 }, { "epoch": 1.294889502762431, "grad_norm": 1.7647854519964312e-06, "learning_rate": 0.00017054558011049724, "loss": 0.0, "step": 3750 }, { "epoch": 1.298342541436464, "grad_norm": 1.7949029142982909e-06, "learning_rate": 0.0001702002762430939, "loss": 0.0, "step": 3760 }, { "epoch": 1.3017955801104972, "grad_norm": 1.7875547655421542e-06, "learning_rate": 0.0001698549723756906, "loss": 0.0, "step": 3770 }, { "epoch": 1.3052486187845305, "grad_norm": 1.760238205861242e-06, "learning_rate": 0.00016950966850828728, "loss": 0.0, "step": 3780 }, { "epoch": 1.3087016574585635, "grad_norm": 1.7486904653196689e-06, "learning_rate": 0.00016916436464088394, "loss": 0.0, "step": 3790 }, { "epoch": 1.3121546961325967, "grad_norm": 1.7295660654781386e-06, "learning_rate": 0.00016881906077348066, "loss": 0.0, "step": 3800 }, { "epoch": 1.3156077348066297, "grad_norm": 1.7624303154661902e-06, "learning_rate": 0.00016847375690607732, "loss": 0.0, "step": 3810 }, { "epoch": 1.319060773480663, "grad_norm": 1.7636212987781619e-06, "learning_rate": 0.000168128453038674, "loss": 0.0, "step": 3820 }, { "epoch": 1.3225138121546962, "grad_norm": 1.6811447949294234e-06, "learning_rate": 0.0001677831491712707, "loss": 0.0, "step": 3830 }, { "epoch": 1.3259668508287292, "grad_norm": 1.6850071915541776e-06, "learning_rate": 0.0001674378453038674, "loss": 0.0, "step": 3840 }, { "epoch": 1.3294198895027625, "grad_norm": 1.7096161855079117e-06, "learning_rate": 0.00016709254143646408, "loss": 0.0, "step": 3850 }, { "epoch": 1.3328729281767955, "grad_norm": 1.7006979078360018e-06, "learning_rate": 0.00016674723756906075, "loss": 0.0, "step": 3860 }, { "epoch": 1.3363259668508287, "grad_norm": 1.716667156870244e-06, "learning_rate": 0.00016640193370165743, "loss": 0.0, "step": 3870 }, { "epoch": 1.339779005524862, "grad_norm": 1.748633280840295e-06, "learning_rate": 0.00016605662983425415, "loss": 0.0, "step": 3880 }, { "epoch": 1.343232044198895, "grad_norm": 1.7325324961348088e-06, "learning_rate": 0.00016571132596685081, "loss": 0.0, "step": 3890 }, { "epoch": 1.3466850828729282, "grad_norm": 1.6648192513457616e-06, "learning_rate": 0.0001653660220994475, "loss": 0.0, "step": 3900 }, { "epoch": 1.3501381215469612, "grad_norm": 1.6344619098163093e-06, "learning_rate": 0.00016502071823204417, "loss": 0.0, "step": 3910 }, { "epoch": 1.3535911602209945, "grad_norm": 1.701748601590225e-06, "learning_rate": 0.00016467541436464086, "loss": 0.0, "step": 3920 }, { "epoch": 1.3570441988950277, "grad_norm": 1.606780301699473e-06, "learning_rate": 0.00016433011049723757, "loss": 0.0, "step": 3930 }, { "epoch": 1.3604972375690607, "grad_norm": 1.599330630597251e-06, "learning_rate": 0.00016398480662983424, "loss": 0.0, "step": 3940 }, { "epoch": 1.363950276243094, "grad_norm": 1.6267141518255812e-06, "learning_rate": 0.00016363950276243093, "loss": 0.0, "step": 3950 }, { "epoch": 1.367403314917127, "grad_norm": 1.7085745867007063e-06, "learning_rate": 0.0001632941988950276, "loss": 0.0, "step": 3960 }, { "epoch": 1.3708563535911602, "grad_norm": 1.665037984821538e-06, "learning_rate": 0.0001629488950276243, "loss": 0.0, "step": 3970 }, { "epoch": 1.3743093922651934, "grad_norm": 1.6761359802330844e-06, "learning_rate": 0.000162603591160221, "loss": 0.0, "step": 3980 }, { "epoch": 1.3777624309392265, "grad_norm": 1.6589308415859705e-06, "learning_rate": 0.00016225828729281766, "loss": 0.0, "step": 3990 }, { "epoch": 1.3812154696132597, "grad_norm": 1.6525378896403708e-06, "learning_rate": 0.00016191298342541435, "loss": 0.0, "step": 4000 }, { "epoch": 1.3846685082872927, "grad_norm": 1.596346578480734e-06, "learning_rate": 0.000161567679558011, "loss": 0.0, "step": 4010 }, { "epoch": 1.388121546961326, "grad_norm": 1.6445882238258491e-06, "learning_rate": 0.00016122237569060773, "loss": 0.0, "step": 4020 }, { "epoch": 1.3915745856353592, "grad_norm": 1.6247208804998081e-06, "learning_rate": 0.00016087707182320442, "loss": 0.0, "step": 4030 }, { "epoch": 1.3950276243093922, "grad_norm": 1.666152002144372e-06, "learning_rate": 0.00016053176795580108, "loss": 0.0, "step": 4040 }, { "epoch": 1.3984806629834254, "grad_norm": 1.6396236333093839e-06, "learning_rate": 0.00016018646408839777, "loss": 0.0, "step": 4050 }, { "epoch": 1.4019337016574585, "grad_norm": 1.6123648265420343e-06, "learning_rate": 0.00015984116022099443, "loss": 0.0, "step": 4060 }, { "epoch": 1.4053867403314917, "grad_norm": 1.6504756104041007e-06, "learning_rate": 0.00015949585635359115, "loss": 0.0, "step": 4070 }, { "epoch": 1.408839779005525, "grad_norm": 1.5786324638611404e-06, "learning_rate": 0.00015915055248618784, "loss": 0.0, "step": 4080 }, { "epoch": 1.412292817679558, "grad_norm": 1.628574864298571e-06, "learning_rate": 0.0001588052486187845, "loss": 0.0, "step": 4090 }, { "epoch": 1.4157458563535912, "grad_norm": 1.6183513480427791e-06, "learning_rate": 0.0001584599447513812, "loss": 0.0, "step": 4100 }, { "epoch": 1.4191988950276242, "grad_norm": 1.5879085140113602e-06, "learning_rate": 0.00015811464088397788, "loss": 0.0, "step": 4110 }, { "epoch": 1.4226519337016574, "grad_norm": 1.5802490906935418e-06, "learning_rate": 0.00015776933701657457, "loss": 0.0, "step": 4120 }, { "epoch": 1.4261049723756907, "grad_norm": 1.5775150359331747e-06, "learning_rate": 0.00015742403314917126, "loss": 0.0, "step": 4130 }, { "epoch": 1.4295580110497237, "grad_norm": 1.5598884601786267e-06, "learning_rate": 0.00015707872928176793, "loss": 0.0, "step": 4140 }, { "epoch": 1.433011049723757, "grad_norm": 1.573065560478426e-06, "learning_rate": 0.00015673342541436464, "loss": 0.0, "step": 4150 }, { "epoch": 1.43646408839779, "grad_norm": 1.5689798829043866e-06, "learning_rate": 0.0001563881215469613, "loss": 0.0, "step": 4160 }, { "epoch": 1.4399171270718232, "grad_norm": 1.6144003893714398e-06, "learning_rate": 0.000156042817679558, "loss": 0.0, "step": 4170 }, { "epoch": 1.4433701657458564, "grad_norm": 1.56514954596787e-06, "learning_rate": 0.00015569751381215469, "loss": 0.0, "step": 4180 }, { "epoch": 1.4468232044198894, "grad_norm": 1.594019636286248e-06, "learning_rate": 0.00015535220994475135, "loss": 0.0, "step": 4190 }, { "epoch": 1.4502762430939227, "grad_norm": 1.5502050700888503e-06, "learning_rate": 0.00015500690607734807, "loss": 0.0, "step": 4200 }, { "epoch": 1.4537292817679557, "grad_norm": 1.5818742440387723e-06, "learning_rate": 0.00015466160220994473, "loss": 0.0, "step": 4210 }, { "epoch": 1.457182320441989, "grad_norm": 1.5763005194457946e-06, "learning_rate": 0.00015431629834254142, "loss": 0.0, "step": 4220 }, { "epoch": 1.4606353591160222, "grad_norm": 1.574758698552614e-06, "learning_rate": 0.0001539709944751381, "loss": 0.0, "step": 4230 }, { "epoch": 1.4640883977900552, "grad_norm": 1.5549348972854204e-06, "learning_rate": 0.0001536256906077348, "loss": 0.0, "step": 4240 }, { "epoch": 1.4675414364640884, "grad_norm": 1.5614506310157594e-06, "learning_rate": 0.0001532803867403315, "loss": 0.0, "step": 4250 }, { "epoch": 1.4709944751381214, "grad_norm": 1.5552798231510678e-06, "learning_rate": 0.00015293508287292815, "loss": 0.0, "step": 4260 }, { "epoch": 1.4744475138121547, "grad_norm": 1.5928678749332903e-06, "learning_rate": 0.00015258977900552484, "loss": 0.0, "step": 4270 }, { "epoch": 1.477900552486188, "grad_norm": 1.5443998790942715e-06, "learning_rate": 0.00015224447513812156, "loss": 0.0, "step": 4280 }, { "epoch": 1.481353591160221, "grad_norm": 1.5274349607352633e-06, "learning_rate": 0.00015189917127071822, "loss": 0.0, "step": 4290 }, { "epoch": 1.4848066298342542, "grad_norm": 1.5842194898141315e-06, "learning_rate": 0.0001515538674033149, "loss": 0.0, "step": 4300 }, { "epoch": 1.4882596685082872, "grad_norm": 1.5240360653479001e-06, "learning_rate": 0.00015120856353591157, "loss": 0.0, "step": 4310 }, { "epoch": 1.4917127071823204, "grad_norm": 1.5172142866504146e-06, "learning_rate": 0.00015086325966850826, "loss": 0.0, "step": 4320 }, { "epoch": 1.4951657458563536, "grad_norm": 1.517162672826089e-06, "learning_rate": 0.00015051795580110498, "loss": 0.0, "step": 4330 }, { "epoch": 1.4986187845303867, "grad_norm": 1.55850511873723e-06, "learning_rate": 0.00015017265193370164, "loss": 0.0, "step": 4340 }, { "epoch": 1.50207182320442, "grad_norm": 1.5490234090975719e-06, "learning_rate": 0.00014982734806629833, "loss": 0.0, "step": 4350 }, { "epoch": 1.505524861878453, "grad_norm": 1.5344287476182217e-06, "learning_rate": 0.00014948204419889502, "loss": 0.0, "step": 4360 }, { "epoch": 1.5089779005524862, "grad_norm": 1.6017993402783759e-06, "learning_rate": 0.0001491367403314917, "loss": 0.0, "step": 4370 }, { "epoch": 1.5124309392265194, "grad_norm": 1.5482892194995657e-06, "learning_rate": 0.00014879143646408837, "loss": 0.0, "step": 4380 }, { "epoch": 1.5158839779005526, "grad_norm": 1.5139072502279305e-06, "learning_rate": 0.00014844613259668506, "loss": 0.0, "step": 4390 }, { "epoch": 1.5193370165745856, "grad_norm": 1.5773738368807244e-06, "learning_rate": 0.00014810082872928175, "loss": 0.0, "step": 4400 }, { "epoch": 1.5227900552486187, "grad_norm": 1.5696072068749345e-06, "learning_rate": 0.00014775552486187844, "loss": 0.0, "step": 4410 }, { "epoch": 1.526243093922652, "grad_norm": 1.5175635326158954e-06, "learning_rate": 0.00014741022099447513, "loss": 0.0, "step": 4420 }, { "epoch": 1.5296961325966851, "grad_norm": 1.5681947616030811e-06, "learning_rate": 0.0001470649171270718, "loss": 0.0, "step": 4430 }, { "epoch": 1.5331491712707184, "grad_norm": 1.5251923741743667e-06, "learning_rate": 0.00014671961325966849, "loss": 0.0, "step": 4440 }, { "epoch": 1.5366022099447514, "grad_norm": 1.5049783996801125e-06, "learning_rate": 0.00014637430939226518, "loss": 0.0, "step": 4450 }, { "epoch": 1.5400552486187844, "grad_norm": 1.51461881614523e-06, "learning_rate": 0.00014602900552486187, "loss": 0.0, "step": 4460 }, { "epoch": 1.5435082872928176, "grad_norm": 1.5999323750293115e-06, "learning_rate": 0.00014568370165745856, "loss": 0.0, "step": 4470 }, { "epoch": 1.5469613259668509, "grad_norm": 1.6135106761794304e-06, "learning_rate": 0.00014533839779005525, "loss": 0.0, "step": 4480 }, { "epoch": 1.5504143646408841, "grad_norm": 1.5847125496293302e-06, "learning_rate": 0.0001449930939226519, "loss": 0.0, "step": 4490 }, { "epoch": 1.5538674033149171, "grad_norm": 1.6794858765933895e-06, "learning_rate": 0.00014464779005524863, "loss": 0.0, "step": 4500 }, { "epoch": 1.5573204419889501, "grad_norm": 1.5033461977509432e-06, "learning_rate": 0.0001443024861878453, "loss": 0.0, "step": 4510 }, { "epoch": 1.5607734806629834, "grad_norm": 1.5018069916550303e-06, "learning_rate": 0.00014395718232044198, "loss": 0.0, "step": 4520 }, { "epoch": 1.5642265193370166, "grad_norm": 1.574701968820591e-06, "learning_rate": 0.00014361187845303867, "loss": 0.0, "step": 4530 }, { "epoch": 1.5676795580110499, "grad_norm": 1.5138363096411922e-06, "learning_rate": 0.00014326657458563533, "loss": 0.0, "step": 4540 }, { "epoch": 1.5711325966850829, "grad_norm": 1.552573394292267e-06, "learning_rate": 0.00014292127071823205, "loss": 0.0, "step": 4550 }, { "epoch": 1.5745856353591159, "grad_norm": 1.5406980082843802e-06, "learning_rate": 0.0001425759668508287, "loss": 0.0, "step": 4560 }, { "epoch": 1.5780386740331491, "grad_norm": 1.7172618527183658e-06, "learning_rate": 0.0001422306629834254, "loss": 0.0, "step": 4570 }, { "epoch": 1.5814917127071824, "grad_norm": 1.5834467603781377e-06, "learning_rate": 0.0001418853591160221, "loss": 0.0, "step": 4580 }, { "epoch": 1.5849447513812156, "grad_norm": 1.693696162874403e-06, "learning_rate": 0.00014154005524861878, "loss": 0.0, "step": 4590 }, { "epoch": 1.5883977900552486, "grad_norm": 1.6477898725497653e-06, "learning_rate": 0.00014119475138121547, "loss": 0.0, "step": 4600 }, { "epoch": 1.5918508287292816, "grad_norm": 1.5642473272237112e-06, "learning_rate": 0.00014084944751381216, "loss": 0.0, "step": 4610 }, { "epoch": 1.5953038674033149, "grad_norm": 1.558850271976553e-06, "learning_rate": 0.00014050414364640882, "loss": 0.0, "step": 4620 }, { "epoch": 1.598756906077348, "grad_norm": 1.52641371187201e-06, "learning_rate": 0.0001401588397790055, "loss": 0.0, "step": 4630 }, { "epoch": 1.6022099447513813, "grad_norm": 1.6745671018725261e-06, "learning_rate": 0.0001398135359116022, "loss": 0.0, "step": 4640 }, { "epoch": 1.6056629834254144, "grad_norm": 1.540334665151022e-06, "learning_rate": 0.0001394682320441989, "loss": 0.0, "step": 4650 }, { "epoch": 1.6091160220994474, "grad_norm": 1.6089327345980564e-06, "learning_rate": 0.00013912292817679558, "loss": 0.0, "step": 4660 }, { "epoch": 1.6125690607734806, "grad_norm": 1.520251657893823e-06, "learning_rate": 0.00013877762430939224, "loss": 0.0, "step": 4670 }, { "epoch": 1.6160220994475138, "grad_norm": 1.739482627272082e-06, "learning_rate": 0.00013843232044198893, "loss": 0.0, "step": 4680 }, { "epoch": 1.619475138121547, "grad_norm": 1.70287205492059e-06, "learning_rate": 0.00013808701657458562, "loss": 0.0, "step": 4690 }, { "epoch": 1.62292817679558, "grad_norm": 1.679419142419647e-06, "learning_rate": 0.0001377417127071823, "loss": 0.0, "step": 4700 }, { "epoch": 1.6263812154696131, "grad_norm": 1.540618086437462e-06, "learning_rate": 0.000137396408839779, "loss": 0.0, "step": 4710 }, { "epoch": 1.6298342541436464, "grad_norm": 1.6391597910114797e-06, "learning_rate": 0.00013705110497237567, "loss": 0.0, "step": 4720 }, { "epoch": 1.6332872928176796, "grad_norm": 1.7316962157565285e-06, "learning_rate": 0.00013670580110497236, "loss": 0.0, "step": 4730 }, { "epoch": 1.6367403314917128, "grad_norm": 1.7776802678781678e-06, "learning_rate": 0.00013636049723756905, "loss": 0.0, "step": 4740 }, { "epoch": 1.6401933701657458, "grad_norm": 1.5233330259434297e-06, "learning_rate": 0.00013601519337016574, "loss": 0.0, "step": 4750 }, { "epoch": 1.6436464088397789, "grad_norm": 1.6188846529985312e-06, "learning_rate": 0.00013566988950276243, "loss": 0.0, "step": 4760 }, { "epoch": 1.647099447513812, "grad_norm": 1.600709310878301e-06, "learning_rate": 0.00013532458563535912, "loss": 0.0, "step": 4770 }, { "epoch": 1.6505524861878453, "grad_norm": 1.6362160977223539e-06, "learning_rate": 0.00013497928176795578, "loss": 0.0, "step": 4780 }, { "epoch": 1.6540055248618786, "grad_norm": 1.689191662990197e-06, "learning_rate": 0.00013463397790055247, "loss": 0.0, "step": 4790 }, { "epoch": 1.6574585635359116, "grad_norm": 1.6848641735123238e-06, "learning_rate": 0.00013428867403314916, "loss": 0.0, "step": 4800 }, { "epoch": 1.6609116022099446, "grad_norm": 1.608653178664099e-06, "learning_rate": 0.00013394337016574585, "loss": 0.0, "step": 4810 }, { "epoch": 1.6643646408839778, "grad_norm": 1.7487448076280998e-06, "learning_rate": 0.00013359806629834254, "loss": 0.0, "step": 4820 }, { "epoch": 1.667817679558011, "grad_norm": 1.6652361409796868e-06, "learning_rate": 0.0001332527624309392, "loss": 0.0, "step": 4830 }, { "epoch": 1.6712707182320443, "grad_norm": 1.6317464996973285e-06, "learning_rate": 0.0001329074585635359, "loss": 0.0, "step": 4840 }, { "epoch": 1.6747237569060773, "grad_norm": 1.5843201026655152e-06, "learning_rate": 0.00013256215469613258, "loss": 0.0, "step": 4850 }, { "epoch": 1.6781767955801103, "grad_norm": 1.8988182546308963e-06, "learning_rate": 0.00013221685082872927, "loss": 0.0, "step": 4860 }, { "epoch": 1.6816298342541436, "grad_norm": 1.6328094716300257e-06, "learning_rate": 0.00013187154696132596, "loss": 0.0, "step": 4870 }, { "epoch": 1.6850828729281768, "grad_norm": 1.7487229797552573e-06, "learning_rate": 0.00013152624309392265, "loss": 0.0, "step": 4880 }, { "epoch": 1.68853591160221, "grad_norm": 1.720410409689066e-06, "learning_rate": 0.00013118093922651931, "loss": 0.0, "step": 4890 }, { "epoch": 1.691988950276243, "grad_norm": 1.7240305396626354e-06, "learning_rate": 0.000130835635359116, "loss": 0.0, "step": 4900 }, { "epoch": 1.695441988950276, "grad_norm": 1.6894758800845011e-06, "learning_rate": 0.0001304903314917127, "loss": 0.0, "step": 4910 }, { "epoch": 1.6988950276243093, "grad_norm": 1.7298494867645786e-06, "learning_rate": 0.00013014502762430938, "loss": 0.0, "step": 4920 }, { "epoch": 1.7023480662983426, "grad_norm": 1.6028227491915459e-06, "learning_rate": 0.00012979972375690607, "loss": 0.0, "step": 4930 }, { "epoch": 1.7058011049723758, "grad_norm": 1.6318870166287525e-06, "learning_rate": 0.00012945441988950274, "loss": 0.0, "step": 4940 }, { "epoch": 1.7092541436464088, "grad_norm": 1.7611550902074669e-06, "learning_rate": 0.00012910911602209943, "loss": 0.0, "step": 4950 }, { "epoch": 1.7127071823204418, "grad_norm": 1.8018733953795163e-06, "learning_rate": 0.00012876381215469612, "loss": 0.0, "step": 4960 }, { "epoch": 1.716160220994475, "grad_norm": 1.6675501228746725e-06, "learning_rate": 0.0001284185082872928, "loss": 0.0, "step": 4970 }, { "epoch": 1.7196132596685083, "grad_norm": 1.569672349432949e-06, "learning_rate": 0.0001280732044198895, "loss": 0.0, "step": 4980 }, { "epoch": 1.7230662983425415, "grad_norm": 1.733280555527017e-06, "learning_rate": 0.00012772790055248619, "loss": 0.0, "step": 4990 }, { "epoch": 1.7265193370165746, "grad_norm": 1.6006232499421458e-06, "learning_rate": 0.00012738259668508285, "loss": 0.0, "step": 5000 }, { "epoch": 1.7299723756906076, "grad_norm": 1.7194681731780292e-06, "learning_rate": 0.00012703729281767956, "loss": 0.0, "step": 5010 }, { "epoch": 1.7334254143646408, "grad_norm": 1.7312747786490945e-06, "learning_rate": 0.00012669198895027623, "loss": 0.0, "step": 5020 }, { "epoch": 1.736878453038674, "grad_norm": 1.782958065632556e-06, "learning_rate": 0.00012634668508287292, "loss": 0.0, "step": 5030 }, { "epoch": 1.7403314917127073, "grad_norm": 1.6982037323032273e-06, "learning_rate": 0.0001260013812154696, "loss": 0.0, "step": 5040 }, { "epoch": 1.7437845303867403, "grad_norm": 1.5225705283228308e-06, "learning_rate": 0.00012565607734806627, "loss": 0.0, "step": 5050 }, { "epoch": 1.7472375690607733, "grad_norm": 1.6995896885418915e-06, "learning_rate": 0.000125310773480663, "loss": 0.0, "step": 5060 }, { "epoch": 1.7506906077348066, "grad_norm": 1.5926112837405526e-06, "learning_rate": 0.00012496546961325965, "loss": 0.0, "step": 5070 }, { "epoch": 1.7541436464088398, "grad_norm": 1.5133963415792095e-06, "learning_rate": 0.00012462016574585634, "loss": 0.0, "step": 5080 }, { "epoch": 1.757596685082873, "grad_norm": 1.5709131275798427e-06, "learning_rate": 0.00012427486187845303, "loss": 0.0, "step": 5090 }, { "epoch": 1.761049723756906, "grad_norm": 1.6173738686120487e-06, "learning_rate": 0.00012392955801104972, "loss": 0.0, "step": 5100 }, { "epoch": 1.764502762430939, "grad_norm": 1.534387365609291e-06, "learning_rate": 0.0001235842541436464, "loss": 0.0, "step": 5110 }, { "epoch": 1.7679558011049723, "grad_norm": 1.5140748246267322e-06, "learning_rate": 0.0001232389502762431, "loss": 0.0, "step": 5120 }, { "epoch": 1.7714088397790055, "grad_norm": 1.6295621207973454e-06, "learning_rate": 0.00012289364640883976, "loss": 0.0, "step": 5130 }, { "epoch": 1.7748618784530388, "grad_norm": 1.4997949620010331e-06, "learning_rate": 0.00012254834254143645, "loss": 0.0, "step": 5140 }, { "epoch": 1.7783149171270718, "grad_norm": 1.4459902786256862e-06, "learning_rate": 0.00012220303867403314, "loss": 0.0, "step": 5150 }, { "epoch": 1.7817679558011048, "grad_norm": 1.5519017324550077e-06, "learning_rate": 0.00012185773480662983, "loss": 0.0, "step": 5160 }, { "epoch": 1.785220994475138, "grad_norm": 1.5834550595172914e-06, "learning_rate": 0.00012151243093922651, "loss": 0.0, "step": 5170 }, { "epoch": 1.7886740331491713, "grad_norm": 1.5676538396292017e-06, "learning_rate": 0.0001211671270718232, "loss": 0.0, "step": 5180 }, { "epoch": 1.7921270718232045, "grad_norm": 1.497358994129172e-06, "learning_rate": 0.00012082182320441987, "loss": 0.0, "step": 5190 }, { "epoch": 1.7955801104972375, "grad_norm": 1.4666873084934196e-06, "learning_rate": 0.00012047651933701656, "loss": 0.0, "step": 5200 }, { "epoch": 1.7990331491712708, "grad_norm": 1.3719563867198303e-06, "learning_rate": 0.00012013121546961325, "loss": 0.0, "step": 5210 }, { "epoch": 1.8024861878453038, "grad_norm": 1.4237898540159222e-06, "learning_rate": 0.00011978591160220994, "loss": 0.0, "step": 5220 }, { "epoch": 1.805939226519337, "grad_norm": 1.4269458006310742e-06, "learning_rate": 0.00011944060773480662, "loss": 0.0, "step": 5230 }, { "epoch": 1.8093922651933703, "grad_norm": 1.4356587598740589e-06, "learning_rate": 0.0001190953038674033, "loss": 0.0, "step": 5240 }, { "epoch": 1.8128453038674033, "grad_norm": 1.3991067362439935e-06, "learning_rate": 0.00011874999999999999, "loss": 0.0, "step": 5250 }, { "epoch": 1.8162983425414365, "grad_norm": 1.4796416962781223e-06, "learning_rate": 0.00011840469613259668, "loss": 0.0, "step": 5260 }, { "epoch": 1.8197513812154695, "grad_norm": 1.4037169648872805e-06, "learning_rate": 0.00011805939226519337, "loss": 0.0, "step": 5270 }, { "epoch": 1.8232044198895028, "grad_norm": 1.4359213764691958e-06, "learning_rate": 0.00011771408839779004, "loss": 0.0, "step": 5280 }, { "epoch": 1.826657458563536, "grad_norm": 1.5624937077518553e-06, "learning_rate": 0.00011736878453038673, "loss": 0.0, "step": 5290 }, { "epoch": 1.830110497237569, "grad_norm": 1.4253257631935412e-06, "learning_rate": 0.00011702348066298341, "loss": 0.0, "step": 5300 }, { "epoch": 1.8335635359116023, "grad_norm": 1.4953994877942023e-06, "learning_rate": 0.00011667817679558011, "loss": 0.0, "step": 5310 }, { "epoch": 1.8370165745856353, "grad_norm": 1.3932763067714404e-06, "learning_rate": 0.00011633287292817679, "loss": 0.0, "step": 5320 }, { "epoch": 1.8404696132596685, "grad_norm": 1.3962746834295103e-06, "learning_rate": 0.00011598756906077348, "loss": 0.0, "step": 5330 }, { "epoch": 1.8439226519337018, "grad_norm": 1.422096374881221e-06, "learning_rate": 0.00011564226519337015, "loss": 0.0, "step": 5340 }, { "epoch": 1.8473756906077348, "grad_norm": 1.3985896885060356e-06, "learning_rate": 0.00011529696132596683, "loss": 0.0, "step": 5350 }, { "epoch": 1.850828729281768, "grad_norm": 1.422858417754469e-06, "learning_rate": 0.00011495165745856353, "loss": 0.0, "step": 5360 }, { "epoch": 1.854281767955801, "grad_norm": 1.4309388234323706e-06, "learning_rate": 0.00011460635359116021, "loss": 0.0, "step": 5370 }, { "epoch": 1.8577348066298343, "grad_norm": 1.4170850590744521e-06, "learning_rate": 0.0001142610497237569, "loss": 0.0, "step": 5380 }, { "epoch": 1.8611878453038675, "grad_norm": 1.4604578382204636e-06, "learning_rate": 0.00011391574585635358, "loss": 0.0, "step": 5390 }, { "epoch": 1.8646408839779005, "grad_norm": 1.4157706118567148e-06, "learning_rate": 0.00011357044198895027, "loss": 0.0, "step": 5400 }, { "epoch": 1.8680939226519337, "grad_norm": 1.5326307902796543e-06, "learning_rate": 0.00011322513812154694, "loss": 0.0, "step": 5410 }, { "epoch": 1.8715469613259668, "grad_norm": 1.3652637562699965e-06, "learning_rate": 0.00011287983425414365, "loss": 0.0, "step": 5420 }, { "epoch": 1.875, "grad_norm": 1.4383072084456217e-06, "learning_rate": 0.00011253453038674032, "loss": 0.0, "step": 5430 }, { "epoch": 1.8784530386740332, "grad_norm": 1.391429350405815e-06, "learning_rate": 0.000112189226519337, "loss": 0.0, "step": 5440 }, { "epoch": 1.8819060773480663, "grad_norm": 1.3904237903261674e-06, "learning_rate": 0.00011184392265193369, "loss": 0.0, "step": 5450 }, { "epoch": 1.8853591160220995, "grad_norm": 1.3395949736150214e-06, "learning_rate": 0.00011149861878453037, "loss": 0.0, "step": 5460 }, { "epoch": 1.8888121546961325, "grad_norm": 1.3560588740801904e-06, "learning_rate": 0.00011115331491712707, "loss": 0.0, "step": 5470 }, { "epoch": 1.8922651933701657, "grad_norm": 1.3414514796750154e-06, "learning_rate": 0.00011080801104972374, "loss": 0.0, "step": 5480 }, { "epoch": 1.895718232044199, "grad_norm": 1.2994665894439095e-06, "learning_rate": 0.00011046270718232043, "loss": 0.0, "step": 5490 }, { "epoch": 1.899171270718232, "grad_norm": 1.3816174941894133e-06, "learning_rate": 0.00011011740331491711, "loss": 0.0, "step": 5500 }, { "epoch": 1.9026243093922652, "grad_norm": 1.3786287809125497e-06, "learning_rate": 0.0001097720994475138, "loss": 0.0, "step": 5510 }, { "epoch": 1.9060773480662982, "grad_norm": 1.3973191244076588e-06, "learning_rate": 0.00010942679558011049, "loss": 0.0, "step": 5520 }, { "epoch": 1.9095303867403315, "grad_norm": 1.2792074812750798e-06, "learning_rate": 0.00010908149171270718, "loss": 0.0, "step": 5530 }, { "epoch": 1.9129834254143647, "grad_norm": 1.3235901406005723e-06, "learning_rate": 0.00010873618784530386, "loss": 0.0, "step": 5540 }, { "epoch": 1.9164364640883977, "grad_norm": 1.280796823266428e-06, "learning_rate": 0.00010839088397790053, "loss": 0.0, "step": 5550 }, { "epoch": 1.919889502762431, "grad_norm": 1.3323832490641507e-06, "learning_rate": 0.00010804558011049722, "loss": 0.0, "step": 5560 }, { "epoch": 1.923342541436464, "grad_norm": 1.2912943248011288e-06, "learning_rate": 0.00010770027624309391, "loss": 0.0, "step": 5570 }, { "epoch": 1.9267955801104972, "grad_norm": 1.352650315311621e-06, "learning_rate": 0.0001073549723756906, "loss": 0.0, "step": 5580 }, { "epoch": 1.9302486187845305, "grad_norm": 1.2493072745201061e-06, "learning_rate": 0.00010700966850828728, "loss": 0.0, "step": 5590 }, { "epoch": 1.9337016574585635, "grad_norm": 1.3102834373057703e-06, "learning_rate": 0.00010666436464088397, "loss": 0.0, "step": 5600 }, { "epoch": 1.9371546961325967, "grad_norm": 1.2788173080480192e-06, "learning_rate": 0.00010631906077348065, "loss": 0.0, "step": 5610 }, { "epoch": 1.9406077348066297, "grad_norm": 1.2790302434950718e-06, "learning_rate": 0.00010597375690607735, "loss": 0.0, "step": 5620 }, { "epoch": 1.944060773480663, "grad_norm": 1.2219841210026061e-06, "learning_rate": 0.00010562845303867403, "loss": 0.0, "step": 5630 }, { "epoch": 1.9475138121546962, "grad_norm": 1.2949489018865279e-06, "learning_rate": 0.00010528314917127071, "loss": 0.0, "step": 5640 }, { "epoch": 1.9509668508287292, "grad_norm": 1.2215615470267949e-06, "learning_rate": 0.00010493784530386739, "loss": 0.0, "step": 5650 }, { "epoch": 1.9544198895027625, "grad_norm": 1.247333216269908e-06, "learning_rate": 0.00010459254143646407, "loss": 0.0, "step": 5660 }, { "epoch": 1.9578729281767955, "grad_norm": 1.2514155969256535e-06, "learning_rate": 0.00010424723756906077, "loss": 0.0, "step": 5670 }, { "epoch": 1.9613259668508287, "grad_norm": 1.2916174227939337e-06, "learning_rate": 0.00010390193370165745, "loss": 0.0, "step": 5680 }, { "epoch": 1.964779005524862, "grad_norm": 1.2257036132723442e-06, "learning_rate": 0.00010355662983425414, "loss": 0.0, "step": 5690 }, { "epoch": 1.9682320441988952, "grad_norm": 1.3290754168338026e-06, "learning_rate": 0.00010321132596685081, "loss": 0.0, "step": 5700 }, { "epoch": 1.9716850828729282, "grad_norm": 1.2491209417930804e-06, "learning_rate": 0.0001028660220994475, "loss": 0.0, "step": 5710 }, { "epoch": 1.9751381215469612, "grad_norm": 1.2408169141053804e-06, "learning_rate": 0.00010252071823204419, "loss": 0.0, "step": 5720 }, { "epoch": 1.9785911602209945, "grad_norm": 1.2794864687748486e-06, "learning_rate": 0.00010217541436464088, "loss": 0.0, "step": 5730 }, { "epoch": 1.9820441988950277, "grad_norm": 1.2674851177507662e-06, "learning_rate": 0.00010183011049723756, "loss": 0.0, "step": 5740 }, { "epoch": 1.985497237569061, "grad_norm": 1.2836618452638504e-06, "learning_rate": 0.00010148480662983424, "loss": 0.0, "step": 5750 }, { "epoch": 1.988950276243094, "grad_norm": 1.3393234894465422e-06, "learning_rate": 0.00010113950276243093, "loss": 0.0, "step": 5760 }, { "epoch": 1.992403314917127, "grad_norm": 1.378129240947601e-06, "learning_rate": 0.00010079419889502763, "loss": 0.0, "step": 5770 }, { "epoch": 1.9958563535911602, "grad_norm": 1.2653547400987009e-06, "learning_rate": 0.0001004488950276243, "loss": 0.0, "step": 5780 }, { "epoch": 1.9993093922651934, "grad_norm": 1.27194880406023e-06, "learning_rate": 0.00010010359116022098, "loss": 0.0, "step": 5790 }, { "epoch": 2.0, "eval_loss": 6.537283070429112e-08, "eval_runtime": 148.9408, "eval_samples_per_second": 155.545, "eval_steps_per_second": 4.861, "step": 5792 }, { "epoch": 2.0027624309392267, "grad_norm": 1.302505552303046e-06, "learning_rate": 9.975828729281767e-05, "loss": 0.0, "step": 5800 }, { "epoch": 2.0062154696132595, "grad_norm": 1.2511230806921958e-06, "learning_rate": 9.941298342541435e-05, "loss": 0.0, "step": 5810 }, { "epoch": 2.0096685082872927, "grad_norm": 1.2535533642221708e-06, "learning_rate": 9.906767955801105e-05, "loss": 0.0, "step": 5820 }, { "epoch": 2.013121546961326, "grad_norm": 1.2554449995150208e-06, "learning_rate": 9.872237569060773e-05, "loss": 0.0, "step": 5830 }, { "epoch": 2.016574585635359, "grad_norm": 1.2536825124698225e-06, "learning_rate": 9.837707182320442e-05, "loss": 0.0, "step": 5840 }, { "epoch": 2.0200276243093924, "grad_norm": 1.2861748928116867e-06, "learning_rate": 9.80317679558011e-05, "loss": 0.0, "step": 5850 }, { "epoch": 2.023480662983425, "grad_norm": 1.2421033943610382e-06, "learning_rate": 9.768646408839777e-05, "loss": 0.0, "step": 5860 }, { "epoch": 2.0269337016574585, "grad_norm": 1.270347183890408e-06, "learning_rate": 9.734116022099447e-05, "loss": 0.0, "step": 5870 }, { "epoch": 2.0303867403314917, "grad_norm": 1.2868068779425812e-06, "learning_rate": 9.699585635359115e-05, "loss": 0.0, "step": 5880 }, { "epoch": 2.033839779005525, "grad_norm": 1.2810575071853236e-06, "learning_rate": 9.665055248618784e-05, "loss": 0.0, "step": 5890 }, { "epoch": 2.037292817679558, "grad_norm": 1.3260083733257488e-06, "learning_rate": 9.630524861878452e-05, "loss": 0.0, "step": 5900 }, { "epoch": 2.040745856353591, "grad_norm": 1.3115065939928172e-06, "learning_rate": 9.59599447513812e-05, "loss": 0.0, "step": 5910 }, { "epoch": 2.044198895027624, "grad_norm": 1.322999764852284e-06, "learning_rate": 9.56146408839779e-05, "loss": 0.0, "step": 5920 }, { "epoch": 2.0476519337016574, "grad_norm": 1.2482248621381586e-06, "learning_rate": 9.526933701657459e-05, "loss": 0.0, "step": 5930 }, { "epoch": 2.0511049723756907, "grad_norm": 1.2718530797428684e-06, "learning_rate": 9.492403314917126e-05, "loss": 0.0, "step": 5940 }, { "epoch": 2.054558011049724, "grad_norm": 1.2434273912731442e-06, "learning_rate": 9.457872928176795e-05, "loss": 0.0, "step": 5950 }, { "epoch": 2.0580110497237567, "grad_norm": 1.207373088618624e-06, "learning_rate": 9.423342541436463e-05, "loss": 0.0, "step": 5960 }, { "epoch": 2.06146408839779, "grad_norm": 1.2399498245940777e-06, "learning_rate": 9.388812154696133e-05, "loss": 0.0, "step": 5970 }, { "epoch": 2.064917127071823, "grad_norm": 1.2543387128971517e-06, "learning_rate": 9.354281767955801e-05, "loss": 0.0, "step": 5980 }, { "epoch": 2.0683701657458564, "grad_norm": 1.2395491921779467e-06, "learning_rate": 9.319751381215468e-05, "loss": 0.0, "step": 5990 }, { "epoch": 2.0718232044198897, "grad_norm": 1.2791253993782448e-06, "learning_rate": 9.285220994475137e-05, "loss": 0.0, "step": 6000 }, { "epoch": 2.0752762430939224, "grad_norm": 1.222889522978221e-06, "learning_rate": 9.250690607734805e-05, "loss": 0.0, "step": 6010 }, { "epoch": 2.0787292817679557, "grad_norm": 1.2209752640046645e-06, "learning_rate": 9.216160220994475e-05, "loss": 0.0, "step": 6020 }, { "epoch": 2.082182320441989, "grad_norm": 1.1890125506397453e-06, "learning_rate": 9.181629834254143e-05, "loss": 0.0, "step": 6030 }, { "epoch": 2.085635359116022, "grad_norm": 1.221079173774342e-06, "learning_rate": 9.147099447513812e-05, "loss": 0.0, "step": 6040 }, { "epoch": 2.0890883977900554, "grad_norm": 1.1833091093649273e-06, "learning_rate": 9.11256906077348e-05, "loss": 0.0, "step": 6050 }, { "epoch": 2.092541436464088, "grad_norm": 1.1755959121728665e-06, "learning_rate": 9.078038674033147e-05, "loss": 0.0, "step": 6060 }, { "epoch": 2.0959944751381214, "grad_norm": 1.2618676237252657e-06, "learning_rate": 9.043508287292816e-05, "loss": 0.0, "step": 6070 }, { "epoch": 2.0994475138121547, "grad_norm": 1.1843758329632692e-06, "learning_rate": 9.008977900552487e-05, "loss": 0.0, "step": 6080 }, { "epoch": 2.102900552486188, "grad_norm": 1.1937182762267184e-06, "learning_rate": 8.974447513812154e-05, "loss": 0.0, "step": 6090 }, { "epoch": 2.106353591160221, "grad_norm": 1.1368030072844704e-06, "learning_rate": 8.939917127071822e-05, "loss": 0.0, "step": 6100 }, { "epoch": 2.109806629834254, "grad_norm": 1.1481572528282413e-06, "learning_rate": 8.905386740331491e-05, "loss": 0.0, "step": 6110 }, { "epoch": 2.113259668508287, "grad_norm": 1.2011357739538653e-06, "learning_rate": 8.870856353591158e-05, "loss": 0.0, "step": 6120 }, { "epoch": 2.1167127071823204, "grad_norm": 1.2087302820873447e-06, "learning_rate": 8.836325966850829e-05, "loss": 0.0, "step": 6130 }, { "epoch": 2.1201657458563536, "grad_norm": 1.3975956107969978e-06, "learning_rate": 8.801795580110496e-05, "loss": 0.0, "step": 6140 }, { "epoch": 2.123618784530387, "grad_norm": 1.173033524537459e-06, "learning_rate": 8.767265193370165e-05, "loss": 0.0, "step": 6150 }, { "epoch": 2.12707182320442, "grad_norm": 1.3078484926154488e-06, "learning_rate": 8.732734806629833e-05, "loss": 0.0, "step": 6160 }, { "epoch": 2.130524861878453, "grad_norm": 1.278099034607294e-06, "learning_rate": 8.698204419889501e-05, "loss": 0.0, "step": 6170 }, { "epoch": 2.133977900552486, "grad_norm": 1.2614267461685813e-06, "learning_rate": 8.663674033149171e-05, "loss": 0.0, "step": 6180 }, { "epoch": 2.1374309392265194, "grad_norm": 1.2256597301529837e-06, "learning_rate": 8.629143646408839e-05, "loss": 0.0, "step": 6190 }, { "epoch": 2.1408839779005526, "grad_norm": 1.211039943882497e-06, "learning_rate": 8.594613259668508e-05, "loss": 0.0, "step": 6200 }, { "epoch": 2.1443370165745854, "grad_norm": 1.259349005522381e-06, "learning_rate": 8.560082872928175e-05, "loss": 0.0, "step": 6210 }, { "epoch": 2.1477900552486187, "grad_norm": 1.248475427928497e-06, "learning_rate": 8.525552486187844e-05, "loss": 0.0, "step": 6220 }, { "epoch": 2.151243093922652, "grad_norm": 1.2817167771572713e-06, "learning_rate": 8.491022099447513e-05, "loss": 0.0, "step": 6230 }, { "epoch": 2.154696132596685, "grad_norm": 1.2208245152578456e-06, "learning_rate": 8.456491712707182e-05, "loss": 0.0, "step": 6240 }, { "epoch": 2.1581491712707184, "grad_norm": 1.2713056776192388e-06, "learning_rate": 8.42196132596685e-05, "loss": 0.0, "step": 6250 }, { "epoch": 2.1616022099447516, "grad_norm": 1.23030042686878e-06, "learning_rate": 8.387430939226519e-05, "loss": 0.0, "step": 6260 }, { "epoch": 2.1650552486187844, "grad_norm": 1.2689947652688716e-06, "learning_rate": 8.352900552486186e-05, "loss": 0.0, "step": 6270 }, { "epoch": 2.1685082872928176, "grad_norm": 1.2374957805150189e-06, "learning_rate": 8.318370165745857e-05, "loss": 0.0, "step": 6280 }, { "epoch": 2.171961325966851, "grad_norm": 1.2511869726949953e-06, "learning_rate": 8.283839779005524e-05, "loss": 0.0, "step": 6290 }, { "epoch": 2.175414364640884, "grad_norm": 1.262544174096547e-06, "learning_rate": 8.249309392265192e-05, "loss": 0.0, "step": 6300 }, { "epoch": 2.178867403314917, "grad_norm": 1.2743558954753098e-06, "learning_rate": 8.214779005524861e-05, "loss": 0.0, "step": 6310 }, { "epoch": 2.18232044198895, "grad_norm": 1.3678410368811456e-06, "learning_rate": 8.180248618784529e-05, "loss": 0.0, "step": 6320 }, { "epoch": 2.1857734806629834, "grad_norm": 1.3123024018568685e-06, "learning_rate": 8.145718232044199e-05, "loss": 0.0, "step": 6330 }, { "epoch": 2.1892265193370166, "grad_norm": 1.1941641560042626e-06, "learning_rate": 8.111187845303867e-05, "loss": 0.0, "step": 6340 }, { "epoch": 2.19267955801105, "grad_norm": 1.1700332152031478e-06, "learning_rate": 8.076657458563536e-05, "loss": 0.0, "step": 6350 }, { "epoch": 2.196132596685083, "grad_norm": 1.2660572110689827e-06, "learning_rate": 8.042127071823203e-05, "loss": 0.0, "step": 6360 }, { "epoch": 2.199585635359116, "grad_norm": 1.208563958243758e-06, "learning_rate": 8.007596685082871e-05, "loss": 0.0, "step": 6370 }, { "epoch": 2.203038674033149, "grad_norm": 1.215967472489865e-06, "learning_rate": 7.973066298342541e-05, "loss": 0.0, "step": 6380 }, { "epoch": 2.2064917127071824, "grad_norm": 1.2395387329888763e-06, "learning_rate": 7.93853591160221e-05, "loss": 0.0, "step": 6390 }, { "epoch": 2.2099447513812156, "grad_norm": 1.2592900020536035e-06, "learning_rate": 7.904005524861878e-05, "loss": 0.0, "step": 6400 }, { "epoch": 2.2133977900552484, "grad_norm": 1.2446776054275688e-06, "learning_rate": 7.869475138121546e-05, "loss": 0.0, "step": 6410 }, { "epoch": 2.2168508287292816, "grad_norm": 1.3018749314142042e-06, "learning_rate": 7.834944751381215e-05, "loss": 0.0, "step": 6420 }, { "epoch": 2.220303867403315, "grad_norm": 1.2104586630812264e-06, "learning_rate": 7.800414364640883e-05, "loss": 0.0, "step": 6430 }, { "epoch": 2.223756906077348, "grad_norm": 1.1618906228250125e-06, "learning_rate": 7.765883977900552e-05, "loss": 0.0, "step": 6440 }, { "epoch": 2.2272099447513813, "grad_norm": 1.2644270555028925e-06, "learning_rate": 7.73135359116022e-05, "loss": 0.0, "step": 6450 }, { "epoch": 2.2306629834254146, "grad_norm": 1.1858845709866728e-06, "learning_rate": 7.696823204419889e-05, "loss": 0.0, "step": 6460 }, { "epoch": 2.2341160220994474, "grad_norm": 1.2048935786879156e-06, "learning_rate": 7.662292817679557e-05, "loss": 0.0, "step": 6470 }, { "epoch": 2.2375690607734806, "grad_norm": 1.2809289273718605e-06, "learning_rate": 7.627762430939227e-05, "loss": 0.0, "step": 6480 }, { "epoch": 2.241022099447514, "grad_norm": 1.1714131460394128e-06, "learning_rate": 7.593232044198895e-05, "loss": 0.0, "step": 6490 }, { "epoch": 2.244475138121547, "grad_norm": 1.21106972983398e-06, "learning_rate": 7.558701657458562e-05, "loss": 0.0, "step": 6500 }, { "epoch": 2.24792817679558, "grad_norm": 1.1688061931636184e-06, "learning_rate": 7.524171270718231e-05, "loss": 0.0, "step": 6510 }, { "epoch": 2.251381215469613, "grad_norm": 1.2267123565834481e-06, "learning_rate": 7.4896408839779e-05, "loss": 0.0, "step": 6520 }, { "epoch": 2.2548342541436464, "grad_norm": 1.162765215667605e-06, "learning_rate": 7.455110497237568e-05, "loss": 0.0, "step": 6530 }, { "epoch": 2.2582872928176796, "grad_norm": 1.2105133464501705e-06, "learning_rate": 7.420580110497237e-05, "loss": 0.0, "step": 6540 }, { "epoch": 2.261740331491713, "grad_norm": 1.2223775911479606e-06, "learning_rate": 7.386049723756906e-05, "loss": 0.0, "step": 6550 }, { "epoch": 2.265193370165746, "grad_norm": 1.1378373301340616e-06, "learning_rate": 7.351519337016574e-05, "loss": 0.0, "step": 6560 }, { "epoch": 2.268646408839779, "grad_norm": 1.238130380443181e-06, "learning_rate": 7.316988950276243e-05, "loss": 0.0, "step": 6570 }, { "epoch": 2.272099447513812, "grad_norm": 1.264096908926149e-06, "learning_rate": 7.282458563535912e-05, "loss": 0.0, "step": 6580 }, { "epoch": 2.2755524861878453, "grad_norm": 1.1736433407349978e-06, "learning_rate": 7.247928176795579e-05, "loss": 0.0, "step": 6590 }, { "epoch": 2.2790055248618786, "grad_norm": 1.1446206826803973e-06, "learning_rate": 7.213397790055248e-05, "loss": 0.0, "step": 6600 }, { "epoch": 2.2824585635359114, "grad_norm": 1.1755490731957252e-06, "learning_rate": 7.178867403314916e-05, "loss": 0.0, "step": 6610 }, { "epoch": 2.2859116022099446, "grad_norm": 1.1966729971391032e-06, "learning_rate": 7.144337016574585e-05, "loss": 0.0, "step": 6620 }, { "epoch": 2.289364640883978, "grad_norm": 1.2044902177876793e-06, "learning_rate": 7.109806629834254e-05, "loss": 0.0, "step": 6630 }, { "epoch": 2.292817679558011, "grad_norm": 1.17203501304175e-06, "learning_rate": 7.075276243093921e-05, "loss": 0.0, "step": 6640 }, { "epoch": 2.2962707182320443, "grad_norm": 1.150072307609662e-06, "learning_rate": 7.04074585635359e-05, "loss": 0.0, "step": 6650 }, { "epoch": 2.2997237569060776, "grad_norm": 1.1563071211639908e-06, "learning_rate": 7.00621546961326e-05, "loss": 0.0, "step": 6660 }, { "epoch": 2.3031767955801103, "grad_norm": 1.2002936955468613e-06, "learning_rate": 6.971685082872928e-05, "loss": 0.0, "step": 6670 }, { "epoch": 2.3066298342541436, "grad_norm": 1.2543680441012839e-06, "learning_rate": 6.937154696132596e-05, "loss": 0.0, "step": 6680 }, { "epoch": 2.310082872928177, "grad_norm": 1.1671339734675712e-06, "learning_rate": 6.902624309392264e-05, "loss": 0.0, "step": 6690 }, { "epoch": 2.31353591160221, "grad_norm": 1.2821577684007934e-06, "learning_rate": 6.868093922651933e-05, "loss": 0.0, "step": 6700 }, { "epoch": 2.316988950276243, "grad_norm": 1.21054381452268e-06, "learning_rate": 6.833563535911602e-05, "loss": 0.0, "step": 6710 }, { "epoch": 2.320441988950276, "grad_norm": 1.2980367500858847e-06, "learning_rate": 6.79903314917127e-05, "loss": 0.0, "step": 6720 }, { "epoch": 2.3238950276243093, "grad_norm": 1.2534363804661552e-06, "learning_rate": 6.764502762430938e-05, "loss": 0.0, "step": 6730 }, { "epoch": 2.3273480662983426, "grad_norm": 1.3424089502223069e-06, "learning_rate": 6.729972375690607e-05, "loss": 0.0, "step": 6740 }, { "epoch": 2.330801104972376, "grad_norm": 1.1836312978630303e-06, "learning_rate": 6.695441988950276e-05, "loss": 0.0, "step": 6750 }, { "epoch": 2.334254143646409, "grad_norm": 1.1672119626382482e-06, "learning_rate": 6.660911602209944e-05, "loss": 0.0, "step": 6760 }, { "epoch": 2.337707182320442, "grad_norm": 1.2071262744939304e-06, "learning_rate": 6.626381215469613e-05, "loss": 0.0, "step": 6770 }, { "epoch": 2.341160220994475, "grad_norm": 1.2302369896133314e-06, "learning_rate": 6.591850828729282e-05, "loss": 0.0, "step": 6780 }, { "epoch": 2.3446132596685083, "grad_norm": 1.1881347745656967e-06, "learning_rate": 6.55732044198895e-05, "loss": 0.0, "step": 6790 }, { "epoch": 2.3480662983425415, "grad_norm": 1.2039602097502211e-06, "learning_rate": 6.522790055248618e-05, "loss": 0.0, "step": 6800 }, { "epoch": 2.3515193370165743, "grad_norm": 1.230209363711765e-06, "learning_rate": 6.488259668508286e-05, "loss": 0.0, "step": 6810 }, { "epoch": 2.3549723756906076, "grad_norm": 1.2966078202225617e-06, "learning_rate": 6.453729281767955e-05, "loss": 0.0, "step": 6820 }, { "epoch": 2.358425414364641, "grad_norm": 1.2736790040435153e-06, "learning_rate": 6.419198895027624e-05, "loss": 0.0, "step": 6830 }, { "epoch": 2.361878453038674, "grad_norm": 1.3069186479697237e-06, "learning_rate": 6.384668508287292e-05, "loss": 0.0, "step": 6840 }, { "epoch": 2.3653314917127073, "grad_norm": 1.2377799976093229e-06, "learning_rate": 6.35013812154696e-05, "loss": 0.0, "step": 6850 }, { "epoch": 2.3687845303867405, "grad_norm": 1.2712785064650234e-06, "learning_rate": 6.31560773480663e-05, "loss": 0.0, "step": 6860 }, { "epoch": 2.3722375690607733, "grad_norm": 1.18513673896814e-06, "learning_rate": 6.281077348066299e-05, "loss": 0.0, "step": 6870 }, { "epoch": 2.3756906077348066, "grad_norm": 1.3310879012351506e-06, "learning_rate": 6.246546961325966e-05, "loss": 0.0, "step": 6880 }, { "epoch": 2.37914364640884, "grad_norm": 1.2248102621015278e-06, "learning_rate": 6.212016574585635e-05, "loss": 0.0, "step": 6890 }, { "epoch": 2.382596685082873, "grad_norm": 1.2849426411776221e-06, "learning_rate": 6.177486187845304e-05, "loss": 0.0, "step": 6900 }, { "epoch": 2.3860497237569063, "grad_norm": 1.2628722743102117e-06, "learning_rate": 6.142955801104972e-05, "loss": 0.0, "step": 6910 }, { "epoch": 2.389502762430939, "grad_norm": 1.3354148222788353e-06, "learning_rate": 6.10842541436464e-05, "loss": 0.0, "step": 6920 }, { "epoch": 2.3929558011049723, "grad_norm": 1.26142731460277e-06, "learning_rate": 6.073895027624309e-05, "loss": 0.0, "step": 6930 }, { "epoch": 2.3964088397790055, "grad_norm": 1.3645210401591612e-06, "learning_rate": 6.0393646408839774e-05, "loss": 0.0, "step": 6940 }, { "epoch": 2.3998618784530388, "grad_norm": 1.306889089391916e-06, "learning_rate": 6.0048342541436464e-05, "loss": 0.0, "step": 6950 }, { "epoch": 2.403314917127072, "grad_norm": 1.3817385706715868e-06, "learning_rate": 5.970303867403314e-05, "loss": 0.0, "step": 6960 }, { "epoch": 2.406767955801105, "grad_norm": 1.3270599765746738e-06, "learning_rate": 5.9357734806629824e-05, "loss": 0.0, "step": 6970 }, { "epoch": 2.410220994475138, "grad_norm": 1.329727069787623e-06, "learning_rate": 5.9012430939226513e-05, "loss": 0.0, "step": 6980 }, { "epoch": 2.4136740331491713, "grad_norm": 1.346269755231333e-06, "learning_rate": 5.8667127071823196e-05, "loss": 0.0, "step": 6990 }, { "epoch": 2.4171270718232045, "grad_norm": 1.3696923133466044e-06, "learning_rate": 5.8321823204419886e-05, "loss": 0.0, "step": 7000 }, { "epoch": 2.4205801104972378, "grad_norm": 1.2548185850391746e-06, "learning_rate": 5.797651933701657e-05, "loss": 0.0, "step": 7010 }, { "epoch": 2.4240331491712706, "grad_norm": 1.346151293546427e-06, "learning_rate": 5.763121546961325e-05, "loss": 0.0, "step": 7020 }, { "epoch": 2.427486187845304, "grad_norm": 1.4272437738327426e-06, "learning_rate": 5.728591160220994e-05, "loss": 0.0, "step": 7030 }, { "epoch": 2.430939226519337, "grad_norm": 1.4080549135542242e-06, "learning_rate": 5.6940607734806626e-05, "loss": 0.0, "step": 7040 }, { "epoch": 2.4343922651933703, "grad_norm": 1.4643102304034983e-06, "learning_rate": 5.6595303867403315e-05, "loss": 0.0, "step": 7050 }, { "epoch": 2.4378453038674035, "grad_norm": 1.3797723568131914e-06, "learning_rate": 5.625e-05, "loss": 0.0, "step": 7060 }, { "epoch": 2.4412983425414363, "grad_norm": 1.3925057373853633e-06, "learning_rate": 5.5904696132596675e-05, "loss": 0.0, "step": 7070 }, { "epoch": 2.4447513812154695, "grad_norm": 1.3189187484385911e-06, "learning_rate": 5.5559392265193365e-05, "loss": 0.0, "step": 7080 }, { "epoch": 2.4482044198895028, "grad_norm": 1.3639839835377643e-06, "learning_rate": 5.521408839779005e-05, "loss": 0.0, "step": 7090 }, { "epoch": 2.451657458563536, "grad_norm": 1.2806161748812883e-06, "learning_rate": 5.486878453038674e-05, "loss": 0.0, "step": 7100 }, { "epoch": 2.4551104972375692, "grad_norm": 1.5013511074357666e-06, "learning_rate": 5.452348066298342e-05, "loss": 0.0, "step": 7110 }, { "epoch": 2.458563535911602, "grad_norm": 1.3570266901297146e-06, "learning_rate": 5.4178176795580104e-05, "loss": 0.0, "step": 7120 }, { "epoch": 2.4620165745856353, "grad_norm": 1.3733230161960819e-06, "learning_rate": 5.3832872928176794e-05, "loss": 0.0, "step": 7130 }, { "epoch": 2.4654696132596685, "grad_norm": 1.3304069170771982e-06, "learning_rate": 5.348756906077348e-05, "loss": 0.0, "step": 7140 }, { "epoch": 2.4689226519337018, "grad_norm": 1.3794581263937289e-06, "learning_rate": 5.314226519337017e-05, "loss": 0.0, "step": 7150 }, { "epoch": 2.472375690607735, "grad_norm": 1.4294989796326263e-06, "learning_rate": 5.279696132596685e-05, "loss": 0.0, "step": 7160 }, { "epoch": 2.475828729281768, "grad_norm": 1.3799055977870012e-06, "learning_rate": 5.2451657458563526e-05, "loss": 0.0, "step": 7170 }, { "epoch": 2.479281767955801, "grad_norm": 1.3544439525503549e-06, "learning_rate": 5.2106353591160216e-05, "loss": 0.0, "step": 7180 }, { "epoch": 2.4827348066298343, "grad_norm": 1.4935747003619326e-06, "learning_rate": 5.17610497237569e-05, "loss": 0.0, "step": 7190 }, { "epoch": 2.4861878453038675, "grad_norm": 1.3350916105991928e-06, "learning_rate": 5.141574585635359e-05, "loss": 0.0, "step": 7200 }, { "epoch": 2.4896408839779007, "grad_norm": 1.3124501947459066e-06, "learning_rate": 5.107044198895027e-05, "loss": 0.0, "step": 7210 }, { "epoch": 2.4930939226519335, "grad_norm": 1.2801463071809849e-06, "learning_rate": 5.0725138121546955e-05, "loss": 0.0, "step": 7220 }, { "epoch": 2.4965469613259668, "grad_norm": 1.2924041357109672e-06, "learning_rate": 5.0379834254143645e-05, "loss": 0.0, "step": 7230 }, { "epoch": 2.5, "grad_norm": 1.3434764696285129e-06, "learning_rate": 5.003453038674033e-05, "loss": 0.0, "step": 7240 }, { "epoch": 2.5034530386740332, "grad_norm": 1.31899514599354e-06, "learning_rate": 4.968922651933701e-05, "loss": 0.0, "step": 7250 }, { "epoch": 2.5069060773480665, "grad_norm": 1.3149669939593878e-06, "learning_rate": 4.93439226519337e-05, "loss": 0.0, "step": 7260 }, { "epoch": 2.5103591160220997, "grad_norm": 1.2788249250661465e-06, "learning_rate": 4.899861878453038e-05, "loss": 0.0, "step": 7270 }, { "epoch": 2.5138121546961325, "grad_norm": 1.2933428479300346e-06, "learning_rate": 4.865331491712707e-05, "loss": 0.0, "step": 7280 }, { "epoch": 2.5172651933701657, "grad_norm": 1.3242823797554593e-06, "learning_rate": 4.830801104972375e-05, "loss": 0.0, "step": 7290 }, { "epoch": 2.520718232044199, "grad_norm": 1.3286679632074083e-06, "learning_rate": 4.796270718232043e-05, "loss": 0.0, "step": 7300 }, { "epoch": 2.5241712707182318, "grad_norm": 1.2753876035276335e-06, "learning_rate": 4.761740331491712e-05, "loss": 0.0, "step": 7310 }, { "epoch": 2.527624309392265, "grad_norm": 1.3197129646869143e-06, "learning_rate": 4.7272099447513806e-05, "loss": 0.0, "step": 7320 }, { "epoch": 2.5310773480662982, "grad_norm": 1.2606105883605778e-06, "learning_rate": 4.6926795580110496e-05, "loss": 0.0, "step": 7330 }, { "epoch": 2.5345303867403315, "grad_norm": 1.4013769487064565e-06, "learning_rate": 4.658149171270718e-05, "loss": 0.0, "step": 7340 }, { "epoch": 2.5379834254143647, "grad_norm": 1.241318955180759e-06, "learning_rate": 4.623618784530386e-05, "loss": 0.0, "step": 7350 }, { "epoch": 2.541436464088398, "grad_norm": 1.2292646260902984e-06, "learning_rate": 4.589088397790055e-05, "loss": 0.0, "step": 7360 }, { "epoch": 2.544889502762431, "grad_norm": 1.3700774843528052e-06, "learning_rate": 4.554558011049723e-05, "loss": 0.0, "step": 7370 }, { "epoch": 2.548342541436464, "grad_norm": 1.3522789004127844e-06, "learning_rate": 4.5200276243093925e-05, "loss": 0.0, "step": 7380 }, { "epoch": 2.5517955801104972, "grad_norm": 1.3294830978338723e-06, "learning_rate": 4.48549723756906e-05, "loss": 0.0, "step": 7390 }, { "epoch": 2.5552486187845305, "grad_norm": 1.2718649031739915e-06, "learning_rate": 4.4509668508287285e-05, "loss": 0.0, "step": 7400 }, { "epoch": 2.5587016574585633, "grad_norm": 1.322502725997765e-06, "learning_rate": 4.4164364640883974e-05, "loss": 0.0, "step": 7410 }, { "epoch": 2.5621546961325965, "grad_norm": 1.2904275763503392e-06, "learning_rate": 4.381906077348066e-05, "loss": 0.0, "step": 7420 }, { "epoch": 2.5656077348066297, "grad_norm": 1.3272258456709096e-06, "learning_rate": 4.347375690607735e-05, "loss": 0.0, "step": 7430 }, { "epoch": 2.569060773480663, "grad_norm": 1.3059714092378272e-06, "learning_rate": 4.312845303867403e-05, "loss": 0.0, "step": 7440 }, { "epoch": 2.572513812154696, "grad_norm": 1.281578875023115e-06, "learning_rate": 4.2783149171270714e-05, "loss": 0.0, "step": 7450 }, { "epoch": 2.5759668508287294, "grad_norm": 1.2490442031776183e-06, "learning_rate": 4.2437845303867403e-05, "loss": 0.0, "step": 7460 }, { "epoch": 2.5794198895027627, "grad_norm": 1.2794454278264311e-06, "learning_rate": 4.2092541436464087e-05, "loss": 0.0, "step": 7470 }, { "epoch": 2.5828729281767955, "grad_norm": 1.3249749599708593e-06, "learning_rate": 4.1747237569060776e-05, "loss": 0.0, "step": 7480 }, { "epoch": 2.5863259668508287, "grad_norm": 1.3326467751539894e-06, "learning_rate": 4.140193370165745e-05, "loss": 0.0, "step": 7490 }, { "epoch": 2.589779005524862, "grad_norm": 1.2537306020021788e-06, "learning_rate": 4.1056629834254136e-05, "loss": 0.0, "step": 7500 }, { "epoch": 2.593232044198895, "grad_norm": 1.3646642855746904e-06, "learning_rate": 4.0711325966850826e-05, "loss": 0.0, "step": 7510 }, { "epoch": 2.596685082872928, "grad_norm": 1.2777338724845322e-06, "learning_rate": 4.036602209944751e-05, "loss": 0.0, "step": 7520 }, { "epoch": 2.6001381215469612, "grad_norm": 1.2863857818956603e-06, "learning_rate": 4.00207182320442e-05, "loss": 0.0, "step": 7530 }, { "epoch": 2.6035911602209945, "grad_norm": 1.3351325378607726e-06, "learning_rate": 3.967541436464088e-05, "loss": 0.0, "step": 7540 }, { "epoch": 2.6070441988950277, "grad_norm": 1.3100014939482207e-06, "learning_rate": 3.9330110497237565e-05, "loss": 0.0, "step": 7550 }, { "epoch": 2.610497237569061, "grad_norm": 1.1667951866911608e-06, "learning_rate": 3.8984806629834255e-05, "loss": 0.0, "step": 7560 }, { "epoch": 2.613950276243094, "grad_norm": 1.314491100856685e-06, "learning_rate": 3.863950276243094e-05, "loss": 0.0, "step": 7570 }, { "epoch": 2.617403314917127, "grad_norm": 1.3197399084674544e-06, "learning_rate": 3.8294198895027614e-05, "loss": 0.0, "step": 7580 }, { "epoch": 2.62085635359116, "grad_norm": 1.2023742783640046e-06, "learning_rate": 3.7948895027624304e-05, "loss": 0.0, "step": 7590 }, { "epoch": 2.6243093922651934, "grad_norm": 1.2869704733020626e-06, "learning_rate": 3.760359116022099e-05, "loss": 0.0, "step": 7600 }, { "epoch": 2.6277624309392267, "grad_norm": 1.2954942576470785e-06, "learning_rate": 3.725828729281768e-05, "loss": 0.0, "step": 7610 }, { "epoch": 2.6312154696132595, "grad_norm": 1.2907723885291489e-06, "learning_rate": 3.691298342541436e-05, "loss": 0.0, "step": 7620 }, { "epoch": 2.6346685082872927, "grad_norm": 1.2202990546938963e-06, "learning_rate": 3.656767955801105e-05, "loss": 0.0, "step": 7630 }, { "epoch": 2.638121546961326, "grad_norm": 1.198839981952915e-06, "learning_rate": 3.622237569060773e-05, "loss": 0.0, "step": 7640 }, { "epoch": 2.641574585635359, "grad_norm": 1.1681810292429873e-06, "learning_rate": 3.5877071823204416e-05, "loss": 0.0, "step": 7650 }, { "epoch": 2.6450276243093924, "grad_norm": 1.233747980222688e-06, "learning_rate": 3.55317679558011e-05, "loss": 0.0, "step": 7660 }, { "epoch": 2.6484806629834257, "grad_norm": 1.1738615057765855e-06, "learning_rate": 3.518646408839779e-05, "loss": 0.0, "step": 7670 }, { "epoch": 2.6519337016574585, "grad_norm": 1.297883045481285e-06, "learning_rate": 3.484116022099447e-05, "loss": 0.0, "step": 7680 }, { "epoch": 2.6553867403314917, "grad_norm": 1.243111796611629e-06, "learning_rate": 3.449585635359116e-05, "loss": 0.0, "step": 7690 }, { "epoch": 2.658839779005525, "grad_norm": 1.3326042562766816e-06, "learning_rate": 3.415055248618784e-05, "loss": 0.0, "step": 7700 }, { "epoch": 2.662292817679558, "grad_norm": 1.304361603615689e-06, "learning_rate": 3.380524861878453e-05, "loss": 0.0, "step": 7710 }, { "epoch": 2.665745856353591, "grad_norm": 1.2983983879166772e-06, "learning_rate": 3.345994475138121e-05, "loss": 0.0, "step": 7720 }, { "epoch": 2.669198895027624, "grad_norm": 1.1631630059127929e-06, "learning_rate": 3.31146408839779e-05, "loss": 0.0, "step": 7730 }, { "epoch": 2.6726519337016574, "grad_norm": 1.270102529815631e-06, "learning_rate": 3.2769337016574584e-05, "loss": 0.0, "step": 7740 }, { "epoch": 2.6761049723756907, "grad_norm": 1.2677431868723943e-06, "learning_rate": 3.242403314917127e-05, "loss": 0.0, "step": 7750 }, { "epoch": 2.679558011049724, "grad_norm": 1.3403529237621115e-06, "learning_rate": 3.207872928176795e-05, "loss": 0.0, "step": 7760 }, { "epoch": 2.683011049723757, "grad_norm": 1.2296259228605777e-06, "learning_rate": 3.173342541436464e-05, "loss": 0.0, "step": 7770 }, { "epoch": 2.68646408839779, "grad_norm": 1.3147638355803792e-06, "learning_rate": 3.138812154696132e-05, "loss": 0.0, "step": 7780 }, { "epoch": 2.689917127071823, "grad_norm": 1.2382265595078934e-06, "learning_rate": 3.104281767955801e-05, "loss": 0.0, "step": 7790 }, { "epoch": 2.6933701657458564, "grad_norm": 1.1940491049244883e-06, "learning_rate": 3.069751381215469e-05, "loss": 0.0, "step": 7800 }, { "epoch": 2.6968232044198897, "grad_norm": 1.2410971521603642e-06, "learning_rate": 3.035220994475138e-05, "loss": 0.0, "step": 7810 }, { "epoch": 2.7002762430939224, "grad_norm": 1.2351674740784802e-06, "learning_rate": 3.0006906077348062e-05, "loss": 0.0, "step": 7820 }, { "epoch": 2.7037292817679557, "grad_norm": 1.2757242302541272e-06, "learning_rate": 2.966160220994475e-05, "loss": 0.0, "step": 7830 }, { "epoch": 2.707182320441989, "grad_norm": 1.2038384511470213e-06, "learning_rate": 2.9316298342541432e-05, "loss": 0.0, "step": 7840 }, { "epoch": 2.710635359116022, "grad_norm": 1.3714710576095968e-06, "learning_rate": 2.897099447513812e-05, "loss": 0.0, "step": 7850 }, { "epoch": 2.7140883977900554, "grad_norm": 1.1232084489165572e-06, "learning_rate": 2.8625690607734805e-05, "loss": 0.0, "step": 7860 }, { "epoch": 2.7175414364640886, "grad_norm": 1.1925824310310418e-06, "learning_rate": 2.828038674033149e-05, "loss": 0.0, "step": 7870 }, { "epoch": 2.7209944751381214, "grad_norm": 1.1417338328101323e-06, "learning_rate": 2.7935082872928175e-05, "loss": 0.0, "step": 7880 }, { "epoch": 2.7244475138121547, "grad_norm": 1.0749527064035647e-06, "learning_rate": 2.7589779005524858e-05, "loss": 0.0, "step": 7890 }, { "epoch": 2.727900552486188, "grad_norm": 1.2691491519944975e-06, "learning_rate": 2.7244475138121544e-05, "loss": 0.0, "step": 7900 }, { "epoch": 2.731353591160221, "grad_norm": 1.16300896024768e-06, "learning_rate": 2.689917127071823e-05, "loss": 0.0, "step": 7910 }, { "epoch": 2.734806629834254, "grad_norm": 1.110578978114063e-06, "learning_rate": 2.6553867403314917e-05, "loss": 0.0, "step": 7920 }, { "epoch": 2.738259668508287, "grad_norm": 1.1780427939811489e-06, "learning_rate": 2.62085635359116e-05, "loss": 0.0, "step": 7930 }, { "epoch": 2.7417127071823204, "grad_norm": 1.1251114528931794e-06, "learning_rate": 2.5863259668508283e-05, "loss": 0.0, "step": 7940 }, { "epoch": 2.7451657458563536, "grad_norm": 1.1284269021416549e-06, "learning_rate": 2.551795580110497e-05, "loss": 0.0, "step": 7950 }, { "epoch": 2.748618784530387, "grad_norm": 1.1818277698694146e-06, "learning_rate": 2.5172651933701656e-05, "loss": 0.0, "step": 7960 }, { "epoch": 2.75207182320442, "grad_norm": 1.2069627928212867e-06, "learning_rate": 2.4827348066298343e-05, "loss": 0.0, "step": 7970 }, { "epoch": 2.755524861878453, "grad_norm": 1.1397900152587681e-06, "learning_rate": 2.4482044198895026e-05, "loss": 0.0, "step": 7980 }, { "epoch": 2.758977900552486, "grad_norm": 1.0643935866028187e-06, "learning_rate": 2.413674033149171e-05, "loss": 0.0, "step": 7990 }, { "epoch": 2.7624309392265194, "grad_norm": 1.138638026532135e-06, "learning_rate": 2.3791436464088395e-05, "loss": 0.0, "step": 8000 }, { "epoch": 2.7658839779005526, "grad_norm": 1.1600459401961416e-06, "learning_rate": 2.3446132596685082e-05, "loss": 0.0, "step": 8010 }, { "epoch": 2.7693370165745854, "grad_norm": 1.2335358405834995e-06, "learning_rate": 2.310082872928177e-05, "loss": 0.0, "step": 8020 }, { "epoch": 2.7727900552486187, "grad_norm": 1.047859768732451e-06, "learning_rate": 2.2755524861878448e-05, "loss": 0.0, "step": 8030 }, { "epoch": 2.776243093922652, "grad_norm": 1.1262660564170801e-06, "learning_rate": 2.2410220994475135e-05, "loss": 0.0, "step": 8040 }, { "epoch": 2.779696132596685, "grad_norm": 1.130129817283887e-06, "learning_rate": 2.206491712707182e-05, "loss": 0.0, "step": 8050 }, { "epoch": 2.7831491712707184, "grad_norm": 1.3053880820734776e-06, "learning_rate": 2.1719613259668508e-05, "loss": 0.0, "step": 8060 }, { "epoch": 2.7866022099447516, "grad_norm": 1.0777407624118496e-06, "learning_rate": 2.1374309392265194e-05, "loss": 0.0, "step": 8070 }, { "epoch": 2.7900552486187844, "grad_norm": 1.0464850674907211e-06, "learning_rate": 2.1029005524861874e-05, "loss": 0.0, "step": 8080 }, { "epoch": 2.7935082872928176, "grad_norm": 1.189749013974506e-06, "learning_rate": 2.068370165745856e-05, "loss": 0.0, "step": 8090 }, { "epoch": 2.796961325966851, "grad_norm": 1.2039630519211642e-06, "learning_rate": 2.0338397790055247e-05, "loss": 0.0, "step": 8100 }, { "epoch": 2.800414364640884, "grad_norm": 1.1546870837264578e-06, "learning_rate": 1.9993093922651933e-05, "loss": 0.0, "step": 8110 }, { "epoch": 2.803867403314917, "grad_norm": 1.0364394711359637e-06, "learning_rate": 1.964779005524862e-05, "loss": 0.0, "step": 8120 }, { "epoch": 2.80732044198895, "grad_norm": 1.1577117220440414e-06, "learning_rate": 1.93024861878453e-05, "loss": 0.0, "step": 8130 }, { "epoch": 2.8107734806629834, "grad_norm": 1.2328664524829946e-06, "learning_rate": 1.8957182320441986e-05, "loss": 0.0, "step": 8140 }, { "epoch": 2.8142265193370166, "grad_norm": 1.147184661931533e-06, "learning_rate": 1.8611878453038672e-05, "loss": 0.0, "step": 8150 }, { "epoch": 2.81767955801105, "grad_norm": 1.0496038385099382e-06, "learning_rate": 1.8266574585635355e-05, "loss": 0.0, "step": 8160 }, { "epoch": 2.821132596685083, "grad_norm": 1.1424755257394281e-06, "learning_rate": 1.7921270718232042e-05, "loss": 0.0, "step": 8170 }, { "epoch": 2.824585635359116, "grad_norm": 1.1343421419951483e-06, "learning_rate": 1.757596685082873e-05, "loss": 0.0, "step": 8180 }, { "epoch": 2.828038674033149, "grad_norm": 1.070453436113894e-06, "learning_rate": 1.723066298342541e-05, "loss": 0.0, "step": 8190 }, { "epoch": 2.8314917127071824, "grad_norm": 1.156422172243765e-06, "learning_rate": 1.6885359116022098e-05, "loss": 0.0, "step": 8200 }, { "epoch": 2.8349447513812156, "grad_norm": 1.114323367801262e-06, "learning_rate": 1.654005524861878e-05, "loss": 0.0, "step": 8210 }, { "epoch": 2.8383977900552484, "grad_norm": 1.0582352842902765e-06, "learning_rate": 1.6194751381215467e-05, "loss": 0.0, "step": 8220 }, { "epoch": 2.8418508287292816, "grad_norm": 1.1867974762935773e-06, "learning_rate": 1.5849447513812154e-05, "loss": 0.0, "step": 8230 }, { "epoch": 2.845303867403315, "grad_norm": 1.1317941925881314e-06, "learning_rate": 1.5504143646408837e-05, "loss": 0.0, "step": 8240 }, { "epoch": 2.848756906077348, "grad_norm": 1.0743899565568427e-06, "learning_rate": 1.5158839779005525e-05, "loss": 0.0, "step": 8250 }, { "epoch": 2.8522099447513813, "grad_norm": 1.1211558330614935e-06, "learning_rate": 1.4813535911602208e-05, "loss": 0.0, "step": 8260 }, { "epoch": 2.8556629834254146, "grad_norm": 1.1120091585326008e-06, "learning_rate": 1.4468232044198895e-05, "loss": 0.0, "step": 8270 }, { "epoch": 2.8591160220994474, "grad_norm": 1.0728784900493338e-06, "learning_rate": 1.4122928176795578e-05, "loss": 0.0, "step": 8280 }, { "epoch": 2.8625690607734806, "grad_norm": 1.0785619224407128e-06, "learning_rate": 1.3777624309392264e-05, "loss": 0.0, "step": 8290 }, { "epoch": 2.866022099447514, "grad_norm": 1.0750453611763078e-06, "learning_rate": 1.3432320441988947e-05, "loss": 0.0, "step": 8300 }, { "epoch": 2.869475138121547, "grad_norm": 1.2003378060398973e-06, "learning_rate": 1.3087016574585634e-05, "loss": 0.0, "step": 8310 }, { "epoch": 2.87292817679558, "grad_norm": 1.175400484498823e-06, "learning_rate": 1.274171270718232e-05, "loss": 0.0, "step": 8320 }, { "epoch": 2.876381215469613, "grad_norm": 1.1778462294387282e-06, "learning_rate": 1.2396408839779003e-05, "loss": 0.0, "step": 8330 }, { "epoch": 2.8798342541436464, "grad_norm": 1.1246154372202e-06, "learning_rate": 1.205110497237569e-05, "loss": 0.0, "step": 8340 }, { "epoch": 2.8832872928176796, "grad_norm": 1.119144712902198e-06, "learning_rate": 1.1705801104972375e-05, "loss": 0.0, "step": 8350 }, { "epoch": 2.886740331491713, "grad_norm": 1.1328623941153637e-06, "learning_rate": 1.136049723756906e-05, "loss": 0.0, "step": 8360 }, { "epoch": 2.890193370165746, "grad_norm": 1.2495584087446332e-06, "learning_rate": 1.1015193370165746e-05, "loss": 0.0, "step": 8370 }, { "epoch": 2.893646408839779, "grad_norm": 1.3356406043385505e-06, "learning_rate": 1.0669889502762429e-05, "loss": 0.0, "step": 8380 }, { "epoch": 2.897099447513812, "grad_norm": 1.2539668432509643e-06, "learning_rate": 1.0324585635359116e-05, "loss": 0.0, "step": 8390 }, { "epoch": 2.9005524861878453, "grad_norm": 1.1577500345083536e-06, "learning_rate": 9.9792817679558e-06, "loss": 0.0, "step": 8400 }, { "epoch": 2.9040055248618786, "grad_norm": 1.0928521305686445e-06, "learning_rate": 9.633977900552485e-06, "loss": 0.0, "step": 8410 }, { "epoch": 2.9074585635359114, "grad_norm": 1.1646479833871126e-06, "learning_rate": 9.28867403314917e-06, "loss": 0.0, "step": 8420 }, { "epoch": 2.9109116022099446, "grad_norm": 1.118172804126516e-06, "learning_rate": 8.943370165745856e-06, "loss": 0.0, "step": 8430 }, { "epoch": 2.914364640883978, "grad_norm": 1.0813453172886511e-06, "learning_rate": 8.598066298342541e-06, "loss": 0.0, "step": 8440 }, { "epoch": 2.917817679558011, "grad_norm": 1.124542336583545e-06, "learning_rate": 8.252762430939226e-06, "loss": 0.0, "step": 8450 }, { "epoch": 2.9212707182320443, "grad_norm": 1.095955212804256e-06, "learning_rate": 7.90745856353591e-06, "loss": 0.0, "step": 8460 }, { "epoch": 2.9247237569060776, "grad_norm": 1.3943418935014051e-06, "learning_rate": 7.562154696132596e-06, "loss": 0.0, "step": 8470 }, { "epoch": 2.9281767955801103, "grad_norm": 1.1460970199550502e-06, "learning_rate": 7.216850828729281e-06, "loss": 0.0, "step": 8480 }, { "epoch": 2.9316298342541436, "grad_norm": 1.1148181329190265e-06, "learning_rate": 6.871546961325966e-06, "loss": 0.0, "step": 8490 }, { "epoch": 2.935082872928177, "grad_norm": 1.1865397482324624e-06, "learning_rate": 6.526243093922652e-06, "loss": 0.0, "step": 8500 }, { "epoch": 2.93853591160221, "grad_norm": 1.1013279390681419e-06, "learning_rate": 6.180939226519336e-06, "loss": 0.0, "step": 8510 }, { "epoch": 2.941988950276243, "grad_norm": 1.204652448905108e-06, "learning_rate": 5.835635359116021e-06, "loss": 0.0, "step": 8520 }, { "epoch": 2.945441988950276, "grad_norm": 1.082311200661934e-06, "learning_rate": 5.490331491712706e-06, "loss": 0.0, "step": 8530 }, { "epoch": 2.9488950276243093, "grad_norm": 1.1705669749062508e-06, "learning_rate": 5.1450276243093925e-06, "loss": 0.0, "step": 8540 }, { "epoch": 2.9523480662983426, "grad_norm": 1.1972175570917898e-06, "learning_rate": 4.799723756906077e-06, "loss": 0.0, "step": 8550 }, { "epoch": 2.955801104972376, "grad_norm": 1.2771783985954244e-06, "learning_rate": 4.454419889502762e-06, "loss": 0.0, "step": 8560 }, { "epoch": 2.959254143646409, "grad_norm": 1.096428491109691e-06, "learning_rate": 4.109116022099447e-06, "loss": 0.0, "step": 8570 }, { "epoch": 2.962707182320442, "grad_norm": 1.0665889931260608e-06, "learning_rate": 3.763812154696132e-06, "loss": 0.0, "step": 8580 }, { "epoch": 2.966160220994475, "grad_norm": 1.147935677181522e-06, "learning_rate": 3.4185082872928177e-06, "loss": 0.0, "step": 8590 }, { "epoch": 2.9696132596685083, "grad_norm": 1.0836464525709744e-06, "learning_rate": 3.0732044198895024e-06, "loss": 0.0, "step": 8600 }, { "epoch": 2.9730662983425415, "grad_norm": 1.1260289056735928e-06, "learning_rate": 2.7279005524861872e-06, "loss": 0.0, "step": 8610 }, { "epoch": 2.9765193370165743, "grad_norm": 1.1113207847301965e-06, "learning_rate": 2.382596685082873e-06, "loss": 0.0, "step": 8620 }, { "epoch": 2.9799723756906076, "grad_norm": 1.041067548612773e-06, "learning_rate": 2.0372928176795576e-06, "loss": 0.0, "step": 8630 }, { "epoch": 2.983425414364641, "grad_norm": 1.0935708587567206e-06, "learning_rate": 1.691988950276243e-06, "loss": 0.0, "step": 8640 }, { "epoch": 2.986878453038674, "grad_norm": 1.112028712668689e-06, "learning_rate": 1.346685082872928e-06, "loss": 0.0, "step": 8650 }, { "epoch": 2.9903314917127073, "grad_norm": 1.2088406720067724e-06, "learning_rate": 1.0013812154696133e-06, "loss": 0.0, "step": 8660 }, { "epoch": 2.9937845303867405, "grad_norm": 1.064264438355167e-06, "learning_rate": 6.560773480662983e-07, "loss": 0.0, "step": 8670 }, { "epoch": 2.9972375690607733, "grad_norm": 1.2643539548662375e-06, "learning_rate": 3.107734806629834e-07, "loss": 0.0, "step": 8680 }, { "epoch": 3.0, "eval_loss": 3.460914399511239e-08, "eval_runtime": 148.9873, "eval_samples_per_second": 155.496, "eval_steps_per_second": 4.859, "step": 8688 } ], "logging_steps": 10, "max_steps": 8688, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4851084604040806e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }