{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.8564866149215904, "eval_steps": 500, "global_step": 58600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003168065895770632, "grad_norm": 0.0008460358949378133, "learning_rate": 6.335128286347799e-07, "loss": 0.0007, "step": 100 }, { "epoch": 0.006336131791541264, "grad_norm": 0.006260558497160673, "learning_rate": 1.2670256572695599e-06, "loss": 0.0004, "step": 200 }, { "epoch": 0.009504197687311896, "grad_norm": 0.0002755998866632581, "learning_rate": 1.9005384859043396e-06, "loss": 0.0027, "step": 300 }, { "epoch": 0.012672263583082528, "grad_norm": 0.004525843542069197, "learning_rate": 2.5340513145391198e-06, "loss": 0.003, "step": 400 }, { "epoch": 0.01584032947885316, "grad_norm": 0.009563793428242207, "learning_rate": 3.1675641431738997e-06, "loss": 0.0002, "step": 500 }, { "epoch": 0.01900839537462379, "grad_norm": 2.0679131921497174e-05, "learning_rate": 3.801076971808679e-06, "loss": 0.0025, "step": 600 }, { "epoch": 0.022176461270394424, "grad_norm": 3.369219848536886e-05, "learning_rate": 4.434589800443459e-06, "loss": 0.0007, "step": 700 }, { "epoch": 0.025344527166165056, "grad_norm": 0.03423422574996948, "learning_rate": 5.0681026290782395e-06, "loss": 0.0028, "step": 800 }, { "epoch": 0.02851259306193569, "grad_norm": 0.028383228927850723, "learning_rate": 5.701615457713019e-06, "loss": 0.001, "step": 900 }, { "epoch": 0.03168065895770632, "grad_norm": 8.241041359724477e-05, "learning_rate": 6.335128286347799e-06, "loss": 0.001, "step": 1000 }, { "epoch": 0.03484872485347695, "grad_norm": 0.015536214224994183, "learning_rate": 6.96864111498258e-06, "loss": 0.0024, "step": 1100 }, { "epoch": 0.03801679074924758, "grad_norm": 0.0755501538515091, "learning_rate": 7.602153943617358e-06, "loss": 0.0006, "step": 1200 }, { "epoch": 0.04118485664501822, "grad_norm": 0.011010600253939629, "learning_rate": 8.235666772252139e-06, "loss": 0.0056, "step": 1300 }, { "epoch": 0.04435292254078885, "grad_norm": 4.7160243411781266e-05, "learning_rate": 8.869179600886918e-06, "loss": 0.0037, "step": 1400 }, { "epoch": 0.04752098843655948, "grad_norm": 0.04266763851046562, "learning_rate": 9.502692429521698e-06, "loss": 0.0026, "step": 1500 }, { "epoch": 0.05068905433233011, "grad_norm": 0.03944120928645134, "learning_rate": 1.0136205258156479e-05, "loss": 0.0012, "step": 1600 }, { "epoch": 0.05385712022810074, "grad_norm": 0.0018407816532999277, "learning_rate": 1.0769718086791259e-05, "loss": 0.0005, "step": 1700 }, { "epoch": 0.05702518612387138, "grad_norm": 0.0038744837511330843, "learning_rate": 1.1403230915426038e-05, "loss": 0.0003, "step": 1800 }, { "epoch": 0.060193252019642006, "grad_norm": 0.002657032571732998, "learning_rate": 1.203674374406082e-05, "loss": 0.0006, "step": 1900 }, { "epoch": 0.06336131791541263, "grad_norm": 0.001354015665128827, "learning_rate": 1.2670256572695599e-05, "loss": 0.0004, "step": 2000 }, { "epoch": 0.06652938381118327, "grad_norm": 0.03860320523381233, "learning_rate": 1.3303769401330378e-05, "loss": 0.0034, "step": 2100 }, { "epoch": 0.0696974497069539, "grad_norm": 0.004643497057259083, "learning_rate": 1.393728222996516e-05, "loss": 0.0015, "step": 2200 }, { "epoch": 0.07286551560272454, "grad_norm": 0.0005492149502970278, "learning_rate": 1.4570795058599937e-05, "loss": 0.0011, "step": 2300 }, { "epoch": 0.07603358149849516, "grad_norm": 0.005505191162228584, "learning_rate": 1.5204307887234717e-05, "loss": 0.0006, "step": 2400 }, { "epoch": 0.0792016473942658, "grad_norm": 0.00082977837882936, "learning_rate": 1.58378207158695e-05, "loss": 0.0015, "step": 2500 }, { "epoch": 0.08236971329003644, "grad_norm": 6.766305159544572e-05, "learning_rate": 1.6471333544504278e-05, "loss": 0.0025, "step": 2600 }, { "epoch": 0.08553777918580706, "grad_norm": 0.06128118559718132, "learning_rate": 1.7104846373139055e-05, "loss": 0.0006, "step": 2700 }, { "epoch": 0.0887058450815777, "grad_norm": 0.43053677678108215, "learning_rate": 1.7738359201773837e-05, "loss": 0.0007, "step": 2800 }, { "epoch": 0.09187391097734833, "grad_norm": 0.00012383765715640038, "learning_rate": 1.8371872030408618e-05, "loss": 0.0011, "step": 2900 }, { "epoch": 0.09504197687311897, "grad_norm": 3.1947878596838564e-05, "learning_rate": 1.9005384859043396e-05, "loss": 0.0009, "step": 3000 }, { "epoch": 0.09821004276888959, "grad_norm": 0.0004990168381482363, "learning_rate": 1.9638897687678177e-05, "loss": 0.0019, "step": 3100 }, { "epoch": 0.10137810866466022, "grad_norm": 0.003429220989346504, "learning_rate": 1.998566021376286e-05, "loss": 0.0036, "step": 3200 }, { "epoch": 0.10454617456043086, "grad_norm": 0.00011575384996831417, "learning_rate": 1.995231187367649e-05, "loss": 0.0047, "step": 3300 }, { "epoch": 0.10771424045620148, "grad_norm": 0.001659040222875774, "learning_rate": 1.9918963533590115e-05, "loss": 0.0044, "step": 3400 }, { "epoch": 0.11088230635197212, "grad_norm": 0.00041119763045571744, "learning_rate": 1.9885615193503744e-05, "loss": 0.0018, "step": 3500 }, { "epoch": 0.11405037224774275, "grad_norm": 0.003299827454611659, "learning_rate": 1.9852266853417373e-05, "loss": 0.001, "step": 3600 }, { "epoch": 0.11721843814351339, "grad_norm": 8.044855348998681e-05, "learning_rate": 1.9818918513331e-05, "loss": 0.0027, "step": 3700 }, { "epoch": 0.12038650403928401, "grad_norm": 0.00031627726275473833, "learning_rate": 1.9785570173244628e-05, "loss": 0.0061, "step": 3800 }, { "epoch": 0.12355456993505465, "grad_norm": 0.001032638712786138, "learning_rate": 1.9752221833158257e-05, "loss": 0.001, "step": 3900 }, { "epoch": 0.12672263583082527, "grad_norm": 0.0009450612124055624, "learning_rate": 1.9718873493071883e-05, "loss": 0.0074, "step": 4000 }, { "epoch": 0.12989070172659592, "grad_norm": 0.6000776290893555, "learning_rate": 1.9685525152985512e-05, "loss": 0.0061, "step": 4100 }, { "epoch": 0.13305876762236654, "grad_norm": 0.1603873372077942, "learning_rate": 1.965217681289914e-05, "loss": 0.0015, "step": 4200 }, { "epoch": 0.13622683351813716, "grad_norm": 0.0001428043033229187, "learning_rate": 1.9618828472812767e-05, "loss": 0.0024, "step": 4300 }, { "epoch": 0.1393948994139078, "grad_norm": 0.042562585324048996, "learning_rate": 1.9585480132726396e-05, "loss": 0.0051, "step": 4400 }, { "epoch": 0.14256296530967844, "grad_norm": 0.2654358744621277, "learning_rate": 1.9552131792640022e-05, "loss": 0.0036, "step": 4500 }, { "epoch": 0.14573103120544909, "grad_norm": 0.0015032750088721514, "learning_rate": 1.951878345255365e-05, "loss": 0.0004, "step": 4600 }, { "epoch": 0.1488990971012197, "grad_norm": 9.152581333182752e-05, "learning_rate": 1.948543511246728e-05, "loss": 0.0059, "step": 4700 }, { "epoch": 0.15206716299699033, "grad_norm": 0.013867395929992199, "learning_rate": 1.9452086772380906e-05, "loss": 0.0009, "step": 4800 }, { "epoch": 0.15523522889276098, "grad_norm": 0.018314680084586143, "learning_rate": 1.9418738432294535e-05, "loss": 0.001, "step": 4900 }, { "epoch": 0.1584032947885316, "grad_norm": 0.17640839517116547, "learning_rate": 1.9385390092208164e-05, "loss": 0.0008, "step": 5000 }, { "epoch": 0.16157136068430222, "grad_norm": 0.051024582237005234, "learning_rate": 1.935204175212179e-05, "loss": 0.0011, "step": 5100 }, { "epoch": 0.16473942658007287, "grad_norm": 0.003037165617570281, "learning_rate": 1.931869341203542e-05, "loss": 0.0009, "step": 5200 }, { "epoch": 0.1679074924758435, "grad_norm": 0.011393848806619644, "learning_rate": 1.9285345071949045e-05, "loss": 0.0087, "step": 5300 }, { "epoch": 0.17107555837161412, "grad_norm": 0.0004570337769109756, "learning_rate": 1.925199673186267e-05, "loss": 0.0018, "step": 5400 }, { "epoch": 0.17424362426738477, "grad_norm": 4.8830272135091946e-05, "learning_rate": 1.92186483917763e-05, "loss": 0.0071, "step": 5500 }, { "epoch": 0.1774116901631554, "grad_norm": 0.3802063763141632, "learning_rate": 1.918530005168993e-05, "loss": 0.0021, "step": 5600 }, { "epoch": 0.18057975605892604, "grad_norm": 0.067596435546875, "learning_rate": 1.9151951711603555e-05, "loss": 0.0009, "step": 5700 }, { "epoch": 0.18374782195469666, "grad_norm": 0.0005965412128716707, "learning_rate": 1.9118603371517184e-05, "loss": 0.001, "step": 5800 }, { "epoch": 0.18691588785046728, "grad_norm": 0.13070069253444672, "learning_rate": 1.9085255031430813e-05, "loss": 0.0005, "step": 5900 }, { "epoch": 0.19008395374623793, "grad_norm": 0.020468149334192276, "learning_rate": 1.905190669134444e-05, "loss": 0.006, "step": 6000 }, { "epoch": 0.19325201964200855, "grad_norm": 0.026921125128865242, "learning_rate": 1.9018558351258068e-05, "loss": 0.004, "step": 6100 }, { "epoch": 0.19642008553777918, "grad_norm": 0.025172384455800056, "learning_rate": 1.8985210011171697e-05, "loss": 0.0007, "step": 6200 }, { "epoch": 0.19958815143354983, "grad_norm": 0.0012900714064016938, "learning_rate": 1.8951861671085323e-05, "loss": 0.0005, "step": 6300 }, { "epoch": 0.20275621732932045, "grad_norm": 0.007845859974622726, "learning_rate": 1.8918513330998952e-05, "loss": 0.0012, "step": 6400 }, { "epoch": 0.20592428322509107, "grad_norm": 0.22305609285831451, "learning_rate": 1.8885164990912578e-05, "loss": 0.0008, "step": 6500 }, { "epoch": 0.20909234912086172, "grad_norm": 0.0017252659890800714, "learning_rate": 1.8851816650826207e-05, "loss": 0.0151, "step": 6600 }, { "epoch": 0.21226041501663234, "grad_norm": 0.03503908962011337, "learning_rate": 1.8818468310739836e-05, "loss": 0.0007, "step": 6700 }, { "epoch": 0.21542848091240296, "grad_norm": 0.00017412351735401899, "learning_rate": 1.878511997065346e-05, "loss": 0.001, "step": 6800 }, { "epoch": 0.21859654680817361, "grad_norm": 0.25898587703704834, "learning_rate": 1.875177163056709e-05, "loss": 0.001, "step": 6900 }, { "epoch": 0.22176461270394424, "grad_norm": 0.00022779431310482323, "learning_rate": 1.871842329048072e-05, "loss": 0.0022, "step": 7000 }, { "epoch": 0.22493267859971489, "grad_norm": 0.0009634292218834162, "learning_rate": 1.8685074950394346e-05, "loss": 0.0017, "step": 7100 }, { "epoch": 0.2281007444954855, "grad_norm": 0.0021817036904394627, "learning_rate": 1.8651726610307975e-05, "loss": 0.0026, "step": 7200 }, { "epoch": 0.23126881039125613, "grad_norm": 0.01804823987185955, "learning_rate": 1.86183782702216e-05, "loss": 0.0012, "step": 7300 }, { "epoch": 0.23443687628702678, "grad_norm": 0.022183051332831383, "learning_rate": 1.858502993013523e-05, "loss": 0.0029, "step": 7400 }, { "epoch": 0.2376049421827974, "grad_norm": 0.0007926349644549191, "learning_rate": 1.855168159004886e-05, "loss": 0.0006, "step": 7500 }, { "epoch": 0.24077300807856802, "grad_norm": 0.0001486311521148309, "learning_rate": 1.8518333249962484e-05, "loss": 0.0019, "step": 7600 }, { "epoch": 0.24394107397433867, "grad_norm": 0.11593101173639297, "learning_rate": 1.848498490987611e-05, "loss": 0.0049, "step": 7700 }, { "epoch": 0.2471091398701093, "grad_norm": 0.06385669112205505, "learning_rate": 1.845163656978974e-05, "loss": 0.0018, "step": 7800 }, { "epoch": 0.2502772057658799, "grad_norm": 0.001922784373164177, "learning_rate": 1.841828822970337e-05, "loss": 0.0045, "step": 7900 }, { "epoch": 0.25344527166165054, "grad_norm": 0.0033132501412183046, "learning_rate": 1.8384939889616994e-05, "loss": 0.002, "step": 8000 }, { "epoch": 0.2566133375574212, "grad_norm": 0.0015832999488338828, "learning_rate": 1.8351591549530623e-05, "loss": 0.0009, "step": 8100 }, { "epoch": 0.25978140345319184, "grad_norm": 0.00015198950131889433, "learning_rate": 1.8318243209444252e-05, "loss": 0.0103, "step": 8200 }, { "epoch": 0.26294946934896246, "grad_norm": 0.006131887435913086, "learning_rate": 1.8284894869357878e-05, "loss": 0.0048, "step": 8300 }, { "epoch": 0.2661175352447331, "grad_norm": 0.005711342208087444, "learning_rate": 1.8251546529271507e-05, "loss": 0.0013, "step": 8400 }, { "epoch": 0.2692856011405037, "grad_norm": 0.08911605924367905, "learning_rate": 1.8218198189185133e-05, "loss": 0.0016, "step": 8500 }, { "epoch": 0.2724536670362743, "grad_norm": 0.049607861787080765, "learning_rate": 1.8184849849098762e-05, "loss": 0.0071, "step": 8600 }, { "epoch": 0.275621732932045, "grad_norm": 0.003345210338011384, "learning_rate": 1.815150150901239e-05, "loss": 0.0009, "step": 8700 }, { "epoch": 0.2787897988278156, "grad_norm": 0.002960205776616931, "learning_rate": 1.8118153168926017e-05, "loss": 0.0033, "step": 8800 }, { "epoch": 0.28195786472358625, "grad_norm": 0.007276841904968023, "learning_rate": 1.8084804828839646e-05, "loss": 0.0005, "step": 8900 }, { "epoch": 0.28512593061935687, "grad_norm": 0.0012576623121276498, "learning_rate": 1.8051456488753275e-05, "loss": 0.0011, "step": 9000 }, { "epoch": 0.2882939965151275, "grad_norm": 0.001386396586894989, "learning_rate": 1.80181081486669e-05, "loss": 0.0005, "step": 9100 }, { "epoch": 0.29146206241089817, "grad_norm": 0.0024564603809267282, "learning_rate": 1.798475980858053e-05, "loss": 0.004, "step": 9200 }, { "epoch": 0.2946301283066688, "grad_norm": 0.00010097989434143528, "learning_rate": 1.7951411468494156e-05, "loss": 0.001, "step": 9300 }, { "epoch": 0.2977981942024394, "grad_norm": 0.0008612315286882222, "learning_rate": 1.7918063128407785e-05, "loss": 0.0028, "step": 9400 }, { "epoch": 0.30096626009821004, "grad_norm": 0.09617114812135696, "learning_rate": 1.7884714788321414e-05, "loss": 0.0011, "step": 9500 }, { "epoch": 0.30413432599398066, "grad_norm": 0.002843310823664069, "learning_rate": 1.785136644823504e-05, "loss": 0.0011, "step": 9600 }, { "epoch": 0.3073023918897513, "grad_norm": 0.03222779557108879, "learning_rate": 1.781801810814867e-05, "loss": 0.0013, "step": 9700 }, { "epoch": 0.31047045778552196, "grad_norm": 0.002599438652396202, "learning_rate": 1.7784669768062298e-05, "loss": 0.0009, "step": 9800 }, { "epoch": 0.3136385236812926, "grad_norm": 9.065222002391238e-06, "learning_rate": 1.7751321427975924e-05, "loss": 0.0141, "step": 9900 }, { "epoch": 0.3168065895770632, "grad_norm": 0.136834517121315, "learning_rate": 1.7717973087889553e-05, "loss": 0.0038, "step": 10000 }, { "epoch": 0.3199746554728338, "grad_norm": 0.004138951655477285, "learning_rate": 1.768462474780318e-05, "loss": 0.0026, "step": 10100 }, { "epoch": 0.32314272136860445, "grad_norm": 7.845780783100054e-05, "learning_rate": 1.7651276407716808e-05, "loss": 0.0108, "step": 10200 }, { "epoch": 0.3263107872643751, "grad_norm": 0.00233863340690732, "learning_rate": 1.7617928067630434e-05, "loss": 0.0032, "step": 10300 }, { "epoch": 0.32947885316014575, "grad_norm": 5.531415808945894e-05, "learning_rate": 1.7584579727544063e-05, "loss": 0.0012, "step": 10400 }, { "epoch": 0.33264691905591637, "grad_norm": 0.002138437470421195, "learning_rate": 1.755123138745769e-05, "loss": 0.0011, "step": 10500 }, { "epoch": 0.335814984951687, "grad_norm": 0.007583692204207182, "learning_rate": 1.7517883047371318e-05, "loss": 0.0025, "step": 10600 }, { "epoch": 0.3389830508474576, "grad_norm": 0.0399901457130909, "learning_rate": 1.7484534707284947e-05, "loss": 0.0026, "step": 10700 }, { "epoch": 0.34215111674322823, "grad_norm": 0.10378504544496536, "learning_rate": 1.7451186367198573e-05, "loss": 0.0027, "step": 10800 }, { "epoch": 0.3453191826389989, "grad_norm": 0.002780862618237734, "learning_rate": 1.74178380271122e-05, "loss": 0.0006, "step": 10900 }, { "epoch": 0.34848724853476953, "grad_norm": 0.3812588155269623, "learning_rate": 1.738448968702583e-05, "loss": 0.0041, "step": 11000 }, { "epoch": 0.35165531443054016, "grad_norm": 2.2577160052605905e-05, "learning_rate": 1.7351141346939457e-05, "loss": 0.0012, "step": 11100 }, { "epoch": 0.3548233803263108, "grad_norm": 0.0003742675471585244, "learning_rate": 1.7317793006853086e-05, "loss": 0.0036, "step": 11200 }, { "epoch": 0.3579914462220814, "grad_norm": 0.0007130543817766011, "learning_rate": 1.7284444666766715e-05, "loss": 0.0009, "step": 11300 }, { "epoch": 0.3611595121178521, "grad_norm": 0.022633198648691177, "learning_rate": 1.725109632668034e-05, "loss": 0.0013, "step": 11400 }, { "epoch": 0.3643275780136227, "grad_norm": 0.0017543600406497717, "learning_rate": 1.721774798659397e-05, "loss": 0.0045, "step": 11500 }, { "epoch": 0.3674956439093933, "grad_norm": 2.876186408684589e-05, "learning_rate": 1.7184399646507595e-05, "loss": 0.0009, "step": 11600 }, { "epoch": 0.37066370980516394, "grad_norm": 0.02501189149916172, "learning_rate": 1.7151051306421225e-05, "loss": 0.0031, "step": 11700 }, { "epoch": 0.37383177570093457, "grad_norm": 0.0001932688319357112, "learning_rate": 1.7117702966334854e-05, "loss": 0.0025, "step": 11800 }, { "epoch": 0.3769998415967052, "grad_norm": 0.006397546734660864, "learning_rate": 1.708435462624848e-05, "loss": 0.0028, "step": 11900 }, { "epoch": 0.38016790749247587, "grad_norm": 0.29939302802085876, "learning_rate": 1.705100628616211e-05, "loss": 0.0006, "step": 12000 }, { "epoch": 0.3833359733882465, "grad_norm": 0.00018301274394616485, "learning_rate": 1.7017657946075738e-05, "loss": 0.0009, "step": 12100 }, { "epoch": 0.3865040392840171, "grad_norm": 0.16564935445785522, "learning_rate": 1.6984309605989363e-05, "loss": 0.0073, "step": 12200 }, { "epoch": 0.38967210517978773, "grad_norm": 0.012698143720626831, "learning_rate": 1.6950961265902993e-05, "loss": 0.0045, "step": 12300 }, { "epoch": 0.39284017107555835, "grad_norm": 0.0014803281519562006, "learning_rate": 1.691761292581662e-05, "loss": 0.0003, "step": 12400 }, { "epoch": 0.39600823697132903, "grad_norm": 0.009712688624858856, "learning_rate": 1.6884264585730244e-05, "loss": 0.0009, "step": 12500 }, { "epoch": 0.39917630286709965, "grad_norm": 0.0013824573252350092, "learning_rate": 1.6850916245643873e-05, "loss": 0.0003, "step": 12600 }, { "epoch": 0.4023443687628703, "grad_norm": 0.23537451028823853, "learning_rate": 1.6817567905557502e-05, "loss": 0.001, "step": 12700 }, { "epoch": 0.4055124346586409, "grad_norm": 0.031864460557699203, "learning_rate": 1.6784219565471128e-05, "loss": 0.0042, "step": 12800 }, { "epoch": 0.4086805005544115, "grad_norm": 0.004931151866912842, "learning_rate": 1.6750871225384757e-05, "loss": 0.0098, "step": 12900 }, { "epoch": 0.41184856645018214, "grad_norm": 0.11204478144645691, "learning_rate": 1.6717522885298386e-05, "loss": 0.0014, "step": 13000 }, { "epoch": 0.4150166323459528, "grad_norm": 0.002025209367275238, "learning_rate": 1.6684174545212012e-05, "loss": 0.0006, "step": 13100 }, { "epoch": 0.41818469824172344, "grad_norm": 0.02806149795651436, "learning_rate": 1.665082620512564e-05, "loss": 0.0013, "step": 13200 }, { "epoch": 0.42135276413749406, "grad_norm": 0.003418268170207739, "learning_rate": 1.661747786503927e-05, "loss": 0.001, "step": 13300 }, { "epoch": 0.4245208300332647, "grad_norm": 0.00011387121776351705, "learning_rate": 1.6584129524952896e-05, "loss": 0.0012, "step": 13400 }, { "epoch": 0.4276888959290353, "grad_norm": 0.008205407299101353, "learning_rate": 1.6550781184866525e-05, "loss": 0.001, "step": 13500 }, { "epoch": 0.43085696182480593, "grad_norm": 0.06980779021978378, "learning_rate": 1.651743284478015e-05, "loss": 0.0011, "step": 13600 }, { "epoch": 0.4340250277205766, "grad_norm": 0.027860773727297783, "learning_rate": 1.648408450469378e-05, "loss": 0.0015, "step": 13700 }, { "epoch": 0.43719309361634723, "grad_norm": 0.0005275747971609235, "learning_rate": 1.645073616460741e-05, "loss": 0.0004, "step": 13800 }, { "epoch": 0.44036115951211785, "grad_norm": 0.0019433089764788747, "learning_rate": 1.6417387824521035e-05, "loss": 0.0008, "step": 13900 }, { "epoch": 0.44352922540788847, "grad_norm": 0.0017472075996920466, "learning_rate": 1.6384039484434664e-05, "loss": 0.001, "step": 14000 }, { "epoch": 0.4466972913036591, "grad_norm": 0.00724576273933053, "learning_rate": 1.6350691144348293e-05, "loss": 0.0009, "step": 14100 }, { "epoch": 0.44986535719942977, "grad_norm": 8.959687693277374e-05, "learning_rate": 1.631734280426192e-05, "loss": 0.0006, "step": 14200 }, { "epoch": 0.4530334230952004, "grad_norm": 0.0013277491088956594, "learning_rate": 1.6283994464175548e-05, "loss": 0.0012, "step": 14300 }, { "epoch": 0.456201488990971, "grad_norm": 0.0497293621301651, "learning_rate": 1.6250646124089174e-05, "loss": 0.0018, "step": 14400 }, { "epoch": 0.45936955488674164, "grad_norm": 0.003778190817683935, "learning_rate": 1.6217297784002803e-05, "loss": 0.0006, "step": 14500 }, { "epoch": 0.46253762078251226, "grad_norm": 0.022186335176229477, "learning_rate": 1.6183949443916432e-05, "loss": 0.0016, "step": 14600 }, { "epoch": 0.4657056866782829, "grad_norm": 8.587163392803632e-06, "learning_rate": 1.6150601103830058e-05, "loss": 0.0013, "step": 14700 }, { "epoch": 0.46887375257405356, "grad_norm": 0.1394946575164795, "learning_rate": 1.6117252763743687e-05, "loss": 0.0005, "step": 14800 }, { "epoch": 0.4720418184698242, "grad_norm": 0.06866718083620071, "learning_rate": 1.6083904423657313e-05, "loss": 0.0003, "step": 14900 }, { "epoch": 0.4752098843655948, "grad_norm": 0.0010026495438069105, "learning_rate": 1.6050556083570942e-05, "loss": 0.0007, "step": 15000 }, { "epoch": 0.4783779502613654, "grad_norm": 0.002642757259309292, "learning_rate": 1.6017207743484568e-05, "loss": 0.0009, "step": 15100 }, { "epoch": 0.48154601615713605, "grad_norm": 0.001733070588670671, "learning_rate": 1.5983859403398197e-05, "loss": 0.001, "step": 15200 }, { "epoch": 0.4847140820529067, "grad_norm": 7.795493729645386e-05, "learning_rate": 1.5950511063311826e-05, "loss": 0.002, "step": 15300 }, { "epoch": 0.48788214794867735, "grad_norm": 0.1361948698759079, "learning_rate": 1.591716272322545e-05, "loss": 0.0019, "step": 15400 }, { "epoch": 0.49105021384444797, "grad_norm": 0.0015777755761519074, "learning_rate": 1.588381438313908e-05, "loss": 0.0007, "step": 15500 }, { "epoch": 0.4942182797402186, "grad_norm": 0.0002617322898004204, "learning_rate": 1.5850466043052706e-05, "loss": 0.0006, "step": 15600 }, { "epoch": 0.4973863456359892, "grad_norm": 0.0005603536847047508, "learning_rate": 1.5817117702966336e-05, "loss": 0.0021, "step": 15700 }, { "epoch": 0.5005544115317598, "grad_norm": 0.007399669848382473, "learning_rate": 1.5783769362879965e-05, "loss": 0.0017, "step": 15800 }, { "epoch": 0.5037224774275305, "grad_norm": 0.0003950314421672374, "learning_rate": 1.575042102279359e-05, "loss": 0.0024, "step": 15900 }, { "epoch": 0.5068905433233011, "grad_norm": 0.00017947182641364634, "learning_rate": 1.571707268270722e-05, "loss": 0.0014, "step": 16000 }, { "epoch": 0.5100586092190718, "grad_norm": 0.010381902568042278, "learning_rate": 1.568372434262085e-05, "loss": 0.0035, "step": 16100 }, { "epoch": 0.5132266751148424, "grad_norm": 0.0018313485197722912, "learning_rate": 1.5650376002534474e-05, "loss": 0.0008, "step": 16200 }, { "epoch": 0.516394741010613, "grad_norm": 8.66261325427331e-05, "learning_rate": 1.5617027662448104e-05, "loss": 0.0009, "step": 16300 }, { "epoch": 0.5195628069063837, "grad_norm": 0.0049484893679618835, "learning_rate": 1.5583679322361733e-05, "loss": 0.0013, "step": 16400 }, { "epoch": 0.5227308728021542, "grad_norm": 0.0376594252884388, "learning_rate": 1.555033098227536e-05, "loss": 0.0022, "step": 16500 }, { "epoch": 0.5258989386979249, "grad_norm": 0.0036107038613408804, "learning_rate": 1.5516982642188988e-05, "loss": 0.0007, "step": 16600 }, { "epoch": 0.5290670045936956, "grad_norm": 0.0010309889912605286, "learning_rate": 1.5483634302102613e-05, "loss": 0.0002, "step": 16700 }, { "epoch": 0.5322350704894662, "grad_norm": 0.0025457690935581923, "learning_rate": 1.5450285962016243e-05, "loss": 0.0039, "step": 16800 }, { "epoch": 0.5354031363852368, "grad_norm": 6.187368126120418e-05, "learning_rate": 1.541693762192987e-05, "loss": 0.0005, "step": 16900 }, { "epoch": 0.5385712022810074, "grad_norm": 0.001785182161256671, "learning_rate": 1.5383589281843497e-05, "loss": 0.0011, "step": 17000 }, { "epoch": 0.5417392681767781, "grad_norm": 8.733890717849135e-05, "learning_rate": 1.5350240941757127e-05, "loss": 0.0025, "step": 17100 }, { "epoch": 0.5449073340725487, "grad_norm": 0.334231436252594, "learning_rate": 1.5316892601670752e-05, "loss": 0.0006, "step": 17200 }, { "epoch": 0.5480753999683193, "grad_norm": 0.0005896109505556524, "learning_rate": 1.528354426158438e-05, "loss": 0.0011, "step": 17300 }, { "epoch": 0.55124346586409, "grad_norm": 0.001895858091302216, "learning_rate": 1.5250195921498007e-05, "loss": 0.0004, "step": 17400 }, { "epoch": 0.5544115317598606, "grad_norm": 0.0020244682673364878, "learning_rate": 1.5216847581411636e-05, "loss": 0.0007, "step": 17500 }, { "epoch": 0.5575795976556313, "grad_norm": 0.005685487762093544, "learning_rate": 1.5183499241325264e-05, "loss": 0.0004, "step": 17600 }, { "epoch": 0.5607476635514018, "grad_norm": 0.0007782336906529963, "learning_rate": 1.5150150901238891e-05, "loss": 0.0085, "step": 17700 }, { "epoch": 0.5639157294471725, "grad_norm": 0.0003303899138700217, "learning_rate": 1.5116802561152519e-05, "loss": 0.0016, "step": 17800 }, { "epoch": 0.5670837953429432, "grad_norm": 0.004449727479368448, "learning_rate": 1.5083454221066148e-05, "loss": 0.0009, "step": 17900 }, { "epoch": 0.5702518612387137, "grad_norm": 0.002211131388321519, "learning_rate": 1.5050105880979775e-05, "loss": 0.0005, "step": 18000 }, { "epoch": 0.5734199271344844, "grad_norm": 0.00011551743227755651, "learning_rate": 1.5016757540893403e-05, "loss": 0.0009, "step": 18100 }, { "epoch": 0.576587993030255, "grad_norm": 2.151384796889033e-05, "learning_rate": 1.4983409200807032e-05, "loss": 0.0005, "step": 18200 }, { "epoch": 0.5797560589260257, "grad_norm": 0.1097760945558548, "learning_rate": 1.4950060860720659e-05, "loss": 0.0019, "step": 18300 }, { "epoch": 0.5829241248217963, "grad_norm": 0.004903141874819994, "learning_rate": 1.4916712520634287e-05, "loss": 0.0015, "step": 18400 }, { "epoch": 0.5860921907175669, "grad_norm": 0.0018493414390832186, "learning_rate": 1.4883364180547914e-05, "loss": 0.0008, "step": 18500 }, { "epoch": 0.5892602566133376, "grad_norm": 0.004849809221923351, "learning_rate": 1.4850015840461543e-05, "loss": 0.0011, "step": 18600 }, { "epoch": 0.5924283225091082, "grad_norm": 0.5303362607955933, "learning_rate": 1.481666750037517e-05, "loss": 0.0007, "step": 18700 }, { "epoch": 0.5955963884048788, "grad_norm": 0.01051903236657381, "learning_rate": 1.4783319160288798e-05, "loss": 0.0011, "step": 18800 }, { "epoch": 0.5987644543006495, "grad_norm": 0.001327801146544516, "learning_rate": 1.4749970820202425e-05, "loss": 0.0005, "step": 18900 }, { "epoch": 0.6019325201964201, "grad_norm": 0.002282701199874282, "learning_rate": 1.4716622480116055e-05, "loss": 0.0023, "step": 19000 }, { "epoch": 0.6051005860921908, "grad_norm": 0.000489629979711026, "learning_rate": 1.4683274140029682e-05, "loss": 0.0028, "step": 19100 }, { "epoch": 0.6082686519879613, "grad_norm": 0.09267735481262207, "learning_rate": 1.464992579994331e-05, "loss": 0.0008, "step": 19200 }, { "epoch": 0.611436717883732, "grad_norm": 6.615820893784985e-05, "learning_rate": 1.4616577459856937e-05, "loss": 0.0007, "step": 19300 }, { "epoch": 0.6146047837795026, "grad_norm": 0.01198121253401041, "learning_rate": 1.4583229119770566e-05, "loss": 0.0005, "step": 19400 }, { "epoch": 0.6177728496752732, "grad_norm": 0.0003829908964689821, "learning_rate": 1.4549880779684193e-05, "loss": 0.0003, "step": 19500 }, { "epoch": 0.6209409155710439, "grad_norm": 0.04535700008273125, "learning_rate": 1.451653243959782e-05, "loss": 0.0009, "step": 19600 }, { "epoch": 0.6241089814668145, "grad_norm": 0.17491640150547028, "learning_rate": 1.4483184099511447e-05, "loss": 0.0024, "step": 19700 }, { "epoch": 0.6272770473625852, "grad_norm": 0.0015510759549215436, "learning_rate": 1.4449835759425074e-05, "loss": 0.0008, "step": 19800 }, { "epoch": 0.6304451132583557, "grad_norm": 0.039857182651758194, "learning_rate": 1.4416487419338703e-05, "loss": 0.0016, "step": 19900 }, { "epoch": 0.6336131791541264, "grad_norm": 0.0024708157870918512, "learning_rate": 1.438313907925233e-05, "loss": 0.0007, "step": 20000 }, { "epoch": 0.6367812450498971, "grad_norm": 2.6672966480255127, "learning_rate": 1.4349790739165958e-05, "loss": 0.0227, "step": 20100 }, { "epoch": 0.6399493109456676, "grad_norm": 0.001121210865676403, "learning_rate": 1.4316442399079587e-05, "loss": 0.0007, "step": 20200 }, { "epoch": 0.6431173768414383, "grad_norm": 0.00026244454784318805, "learning_rate": 1.4283094058993215e-05, "loss": 0.0008, "step": 20300 }, { "epoch": 0.6462854427372089, "grad_norm": 0.008422612212598324, "learning_rate": 1.4249745718906842e-05, "loss": 0.0017, "step": 20400 }, { "epoch": 0.6494535086329796, "grad_norm": 0.003273066831752658, "learning_rate": 1.421639737882047e-05, "loss": 0.0025, "step": 20500 }, { "epoch": 0.6526215745287502, "grad_norm": 0.004011265933513641, "learning_rate": 1.4183049038734099e-05, "loss": 0.0005, "step": 20600 }, { "epoch": 0.6557896404245208, "grad_norm": 2.8203889087308198e-05, "learning_rate": 1.4149700698647726e-05, "loss": 0.0006, "step": 20700 }, { "epoch": 0.6589577063202915, "grad_norm": 0.028400663286447525, "learning_rate": 1.4116352358561354e-05, "loss": 0.0005, "step": 20800 }, { "epoch": 0.6621257722160621, "grad_norm": 0.005530569702386856, "learning_rate": 1.4083004018474981e-05, "loss": 0.0007, "step": 20900 }, { "epoch": 0.6652938381118327, "grad_norm": 0.0010687313042581081, "learning_rate": 1.404965567838861e-05, "loss": 0.0012, "step": 21000 }, { "epoch": 0.6684619040076034, "grad_norm": 0.02122735045850277, "learning_rate": 1.4016307338302238e-05, "loss": 0.0003, "step": 21100 }, { "epoch": 0.671629969903374, "grad_norm": 0.0008722911006771028, "learning_rate": 1.3982958998215865e-05, "loss": 0.0021, "step": 21200 }, { "epoch": 0.6747980357991447, "grad_norm": 0.06774666905403137, "learning_rate": 1.3949610658129492e-05, "loss": 0.0007, "step": 21300 }, { "epoch": 0.6779661016949152, "grad_norm": 0.000172132917214185, "learning_rate": 1.3916262318043122e-05, "loss": 0.0005, "step": 21400 }, { "epoch": 0.6811341675906859, "grad_norm": 0.0020967440214008093, "learning_rate": 1.3882913977956749e-05, "loss": 0.0004, "step": 21500 }, { "epoch": 0.6843022334864565, "grad_norm": 0.00023208513448480517, "learning_rate": 1.3849565637870376e-05, "loss": 0.0007, "step": 21600 }, { "epoch": 0.6874702993822271, "grad_norm": 0.0010145423002541065, "learning_rate": 1.3816217297784006e-05, "loss": 0.0007, "step": 21700 }, { "epoch": 0.6906383652779978, "grad_norm": 0.017040489241480827, "learning_rate": 1.3782868957697633e-05, "loss": 0.001, "step": 21800 }, { "epoch": 0.6938064311737684, "grad_norm": 0.0001387975353281945, "learning_rate": 1.374952061761126e-05, "loss": 0.001, "step": 21900 }, { "epoch": 0.6969744970695391, "grad_norm": 0.0011847980786114931, "learning_rate": 1.3716172277524886e-05, "loss": 0.0015, "step": 22000 }, { "epoch": 0.7001425629653096, "grad_norm": 0.01234695129096508, "learning_rate": 1.3682823937438514e-05, "loss": 0.0054, "step": 22100 }, { "epoch": 0.7033106288610803, "grad_norm": 0.07681901007890701, "learning_rate": 1.3649475597352143e-05, "loss": 0.0004, "step": 22200 }, { "epoch": 0.706478694756851, "grad_norm": 0.018233176320791245, "learning_rate": 1.361612725726577e-05, "loss": 0.0005, "step": 22300 }, { "epoch": 0.7096467606526216, "grad_norm": 0.07004215568304062, "learning_rate": 1.3582778917179398e-05, "loss": 0.0007, "step": 22400 }, { "epoch": 0.7128148265483922, "grad_norm": 0.0069395024329423904, "learning_rate": 1.3549430577093025e-05, "loss": 0.0006, "step": 22500 }, { "epoch": 0.7159828924441628, "grad_norm": 0.0007382028270512819, "learning_rate": 1.3516082237006654e-05, "loss": 0.0053, "step": 22600 }, { "epoch": 0.7191509583399335, "grad_norm": 0.03294491395354271, "learning_rate": 1.3482733896920282e-05, "loss": 0.0005, "step": 22700 }, { "epoch": 0.7223190242357042, "grad_norm": 0.22843457758426666, "learning_rate": 1.3449385556833909e-05, "loss": 0.001, "step": 22800 }, { "epoch": 0.7254870901314747, "grad_norm": 0.0002898550301324576, "learning_rate": 1.3416037216747537e-05, "loss": 0.0004, "step": 22900 }, { "epoch": 0.7286551560272454, "grad_norm": 0.0034780879504978657, "learning_rate": 1.3382688876661166e-05, "loss": 0.0016, "step": 23000 }, { "epoch": 0.731823221923016, "grad_norm": 0.04466221109032631, "learning_rate": 1.3349340536574793e-05, "loss": 0.0043, "step": 23100 }, { "epoch": 0.7349912878187866, "grad_norm": 9.875125215330627e-06, "learning_rate": 1.331599219648842e-05, "loss": 0.0019, "step": 23200 }, { "epoch": 0.7381593537145572, "grad_norm": 0.5125452876091003, "learning_rate": 1.3282643856402048e-05, "loss": 0.0011, "step": 23300 }, { "epoch": 0.7413274196103279, "grad_norm": 0.02045290358364582, "learning_rate": 1.3249295516315677e-05, "loss": 0.0004, "step": 23400 }, { "epoch": 0.7444954855060986, "grad_norm": 0.04965211823582649, "learning_rate": 1.3215947176229305e-05, "loss": 0.0008, "step": 23500 }, { "epoch": 0.7476635514018691, "grad_norm": 0.09520922601222992, "learning_rate": 1.3182598836142932e-05, "loss": 0.0023, "step": 23600 }, { "epoch": 0.7508316172976398, "grad_norm": 4.333815923018847e-06, "learning_rate": 1.3149250496056561e-05, "loss": 0.0005, "step": 23700 }, { "epoch": 0.7539996831934104, "grad_norm": 0.21383151412010193, "learning_rate": 1.3115902155970189e-05, "loss": 0.0006, "step": 23800 }, { "epoch": 0.757167749089181, "grad_norm": 0.0006424040184356272, "learning_rate": 1.3082553815883816e-05, "loss": 0.0004, "step": 23900 }, { "epoch": 0.7603358149849517, "grad_norm": 0.0008101545972749591, "learning_rate": 1.3049205475797443e-05, "loss": 0.0003, "step": 24000 }, { "epoch": 0.7635038808807223, "grad_norm": 0.0026161724235862494, "learning_rate": 1.3015857135711073e-05, "loss": 0.0006, "step": 24100 }, { "epoch": 0.766671946776493, "grad_norm": 9.565529762767255e-05, "learning_rate": 1.29825087956247e-05, "loss": 0.0009, "step": 24200 }, { "epoch": 0.7698400126722635, "grad_norm": 0.026133345440030098, "learning_rate": 1.2949160455538327e-05, "loss": 0.0055, "step": 24300 }, { "epoch": 0.7730080785680342, "grad_norm": 0.0001082066519302316, "learning_rate": 1.2915812115451953e-05, "loss": 0.0007, "step": 24400 }, { "epoch": 0.7761761444638049, "grad_norm": 0.0007577472133561969, "learning_rate": 1.288246377536558e-05, "loss": 0.001, "step": 24500 }, { "epoch": 0.7793442103595755, "grad_norm": 0.0003420517314225435, "learning_rate": 1.284911543527921e-05, "loss": 0.0014, "step": 24600 }, { "epoch": 0.7825122762553461, "grad_norm": 0.006726464722305536, "learning_rate": 1.2815767095192837e-05, "loss": 0.0008, "step": 24700 }, { "epoch": 0.7856803421511167, "grad_norm": 0.0014214670518413186, "learning_rate": 1.2782418755106465e-05, "loss": 0.0011, "step": 24800 }, { "epoch": 0.7888484080468874, "grad_norm": 0.006449591834098101, "learning_rate": 1.2749070415020092e-05, "loss": 0.0026, "step": 24900 }, { "epoch": 0.7920164739426581, "grad_norm": 0.0005304469959810376, "learning_rate": 1.2715722074933721e-05, "loss": 0.0026, "step": 25000 }, { "epoch": 0.7951845398384286, "grad_norm": 0.0006021055160090327, "learning_rate": 1.2682373734847349e-05, "loss": 0.0008, "step": 25100 }, { "epoch": 0.7983526057341993, "grad_norm": 0.0025042754132300615, "learning_rate": 1.2649025394760976e-05, "loss": 0.001, "step": 25200 }, { "epoch": 0.8015206716299699, "grad_norm": 0.013771702535450459, "learning_rate": 1.2615677054674605e-05, "loss": 0.0014, "step": 25300 }, { "epoch": 0.8046887375257405, "grad_norm": 0.007065094541758299, "learning_rate": 1.2582328714588233e-05, "loss": 0.0005, "step": 25400 }, { "epoch": 0.8078568034215111, "grad_norm": 0.0009276416385546327, "learning_rate": 1.254898037450186e-05, "loss": 0.0018, "step": 25500 }, { "epoch": 0.8110248693172818, "grad_norm": 0.026806719601154327, "learning_rate": 1.2515632034415487e-05, "loss": 0.0011, "step": 25600 }, { "epoch": 0.8141929352130525, "grad_norm": 0.00029596476815640926, "learning_rate": 1.2482283694329117e-05, "loss": 0.0036, "step": 25700 }, { "epoch": 0.817361001108823, "grad_norm": 0.0062362137250602245, "learning_rate": 1.2448935354242744e-05, "loss": 0.0009, "step": 25800 }, { "epoch": 0.8205290670045937, "grad_norm": 0.005555544979870319, "learning_rate": 1.2415587014156371e-05, "loss": 0.0019, "step": 25900 }, { "epoch": 0.8236971329003643, "grad_norm": 0.005463754292577505, "learning_rate": 1.2382238674069999e-05, "loss": 0.0014, "step": 26000 }, { "epoch": 0.826865198796135, "grad_norm": 0.0022091898135840893, "learning_rate": 1.2348890333983628e-05, "loss": 0.0005, "step": 26100 }, { "epoch": 0.8300332646919056, "grad_norm": 0.12676991522312164, "learning_rate": 1.2315541993897255e-05, "loss": 0.0014, "step": 26200 }, { "epoch": 0.8332013305876762, "grad_norm": 0.0015652469592168927, "learning_rate": 1.2282193653810883e-05, "loss": 0.0026, "step": 26300 }, { "epoch": 0.8363693964834469, "grad_norm": 0.00408145971596241, "learning_rate": 1.224884531372451e-05, "loss": 0.0011, "step": 26400 }, { "epoch": 0.8395374623792174, "grad_norm": 0.00045841402607038617, "learning_rate": 1.221549697363814e-05, "loss": 0.0216, "step": 26500 }, { "epoch": 0.8427055282749881, "grad_norm": 0.02451806142926216, "learning_rate": 1.2182148633551767e-05, "loss": 0.002, "step": 26600 }, { "epoch": 0.8458735941707588, "grad_norm": 0.00027516239788383245, "learning_rate": 1.2148800293465394e-05, "loss": 0.001, "step": 26700 }, { "epoch": 0.8490416600665294, "grad_norm": 1.4936133084120229e-05, "learning_rate": 1.211545195337902e-05, "loss": 0.0048, "step": 26800 }, { "epoch": 0.8522097259623, "grad_norm": 0.00033001156407408416, "learning_rate": 1.2082103613292648e-05, "loss": 0.0083, "step": 26900 }, { "epoch": 0.8553777918580706, "grad_norm": 0.017794223502278328, "learning_rate": 1.2048755273206277e-05, "loss": 0.0011, "step": 27000 }, { "epoch": 0.8585458577538413, "grad_norm": 0.0008482965640723705, "learning_rate": 1.2015406933119904e-05, "loss": 0.0054, "step": 27100 }, { "epoch": 0.8617139236496119, "grad_norm": 0.01870151422917843, "learning_rate": 1.1982058593033532e-05, "loss": 0.015, "step": 27200 }, { "epoch": 0.8648819895453825, "grad_norm": 10.855655670166016, "learning_rate": 1.194871025294716e-05, "loss": 0.003, "step": 27300 }, { "epoch": 0.8680500554411532, "grad_norm": 0.029217010363936424, "learning_rate": 1.1915361912860788e-05, "loss": 0.0024, "step": 27400 }, { "epoch": 0.8712181213369238, "grad_norm": 0.03195062279701233, "learning_rate": 1.1882013572774416e-05, "loss": 0.0014, "step": 27500 }, { "epoch": 0.8743861872326945, "grad_norm": 0.005607489962130785, "learning_rate": 1.1848665232688043e-05, "loss": 0.0029, "step": 27600 }, { "epoch": 0.877554253128465, "grad_norm": 0.0009394127409905195, "learning_rate": 1.1815316892601672e-05, "loss": 0.0041, "step": 27700 }, { "epoch": 0.8807223190242357, "grad_norm": 0.004952901508659124, "learning_rate": 1.17819685525153e-05, "loss": 0.0007, "step": 27800 }, { "epoch": 0.8838903849200064, "grad_norm": 0.0013988588470965624, "learning_rate": 1.1748620212428927e-05, "loss": 0.0019, "step": 27900 }, { "epoch": 0.8870584508157769, "grad_norm": 0.0029521535616368055, "learning_rate": 1.1715271872342554e-05, "loss": 0.001, "step": 28000 }, { "epoch": 0.8902265167115476, "grad_norm": 0.0014934735372662544, "learning_rate": 1.1681923532256184e-05, "loss": 0.0006, "step": 28100 }, { "epoch": 0.8933945826073182, "grad_norm": 0.12357547879219055, "learning_rate": 1.1648575192169811e-05, "loss": 0.0008, "step": 28200 }, { "epoch": 0.8965626485030889, "grad_norm": 0.0003371001803316176, "learning_rate": 1.1615226852083438e-05, "loss": 0.001, "step": 28300 }, { "epoch": 0.8997307143988595, "grad_norm": 0.01792779751121998, "learning_rate": 1.1581878511997066e-05, "loss": 0.0014, "step": 28400 }, { "epoch": 0.9028987802946301, "grad_norm": 0.031045174226164818, "learning_rate": 1.1548530171910695e-05, "loss": 0.0019, "step": 28500 }, { "epoch": 0.9060668461904008, "grad_norm": 0.031731363385915756, "learning_rate": 1.1515181831824322e-05, "loss": 0.0005, "step": 28600 }, { "epoch": 0.9092349120861714, "grad_norm": 0.003199261147528887, "learning_rate": 1.148183349173795e-05, "loss": 0.0021, "step": 28700 }, { "epoch": 0.912402977981942, "grad_norm": 4.737121344078332e-05, "learning_rate": 1.1448485151651579e-05, "loss": 0.0006, "step": 28800 }, { "epoch": 0.9155710438777127, "grad_norm": 0.012714708223938942, "learning_rate": 1.1415136811565206e-05, "loss": 0.0013, "step": 28900 }, { "epoch": 0.9187391097734833, "grad_norm": 0.00023968149616848677, "learning_rate": 1.1381788471478834e-05, "loss": 0.0013, "step": 29000 }, { "epoch": 0.921907175669254, "grad_norm": 0.00018393975915387273, "learning_rate": 1.1348440131392461e-05, "loss": 0.003, "step": 29100 }, { "epoch": 0.9250752415650245, "grad_norm": 31.17757225036621, "learning_rate": 1.1315091791306087e-05, "loss": 0.0073, "step": 29200 }, { "epoch": 0.9282433074607952, "grad_norm": 0.7764952182769775, "learning_rate": 1.1281743451219716e-05, "loss": 0.0006, "step": 29300 }, { "epoch": 0.9314113733565658, "grad_norm": 0.02078479342162609, "learning_rate": 1.1248395111133344e-05, "loss": 0.0055, "step": 29400 }, { "epoch": 0.9345794392523364, "grad_norm": 0.024362290278077126, "learning_rate": 1.1215046771046971e-05, "loss": 0.0015, "step": 29500 }, { "epoch": 0.9377475051481071, "grad_norm": 0.006054306868463755, "learning_rate": 1.1181698430960599e-05, "loss": 0.0006, "step": 29600 }, { "epoch": 0.9409155710438777, "grad_norm": 0.0014811428263783455, "learning_rate": 1.1148350090874228e-05, "loss": 0.0016, "step": 29700 }, { "epoch": 0.9440836369396484, "grad_norm": 0.00024473376106470823, "learning_rate": 1.1115001750787855e-05, "loss": 0.0048, "step": 29800 }, { "epoch": 0.9472517028354189, "grad_norm": 0.9019193053245544, "learning_rate": 1.1081653410701483e-05, "loss": 0.0012, "step": 29900 }, { "epoch": 0.9504197687311896, "grad_norm": 0.005399579647928476, "learning_rate": 1.104830507061511e-05, "loss": 0.0017, "step": 30000 }, { "epoch": 0.9535878346269603, "grad_norm": 9.696155757410452e-05, "learning_rate": 1.1014956730528739e-05, "loss": 0.001, "step": 30100 }, { "epoch": 0.9567559005227309, "grad_norm": 0.004988879431039095, "learning_rate": 1.0981608390442367e-05, "loss": 0.0009, "step": 30200 }, { "epoch": 0.9599239664185015, "grad_norm": 0.0014560155104845762, "learning_rate": 1.0948260050355994e-05, "loss": 0.0046, "step": 30300 }, { "epoch": 0.9630920323142721, "grad_norm": 0.0015756795182824135, "learning_rate": 1.0914911710269623e-05, "loss": 0.0019, "step": 30400 }, { "epoch": 0.9662600982100428, "grad_norm": 0.0033032239880412817, "learning_rate": 1.088156337018325e-05, "loss": 0.0034, "step": 30500 }, { "epoch": 0.9694281641058135, "grad_norm": 0.001508047222159803, "learning_rate": 1.0848215030096878e-05, "loss": 0.0015, "step": 30600 }, { "epoch": 0.972596230001584, "grad_norm": 0.005174440797418356, "learning_rate": 1.0814866690010505e-05, "loss": 0.0007, "step": 30700 }, { "epoch": 0.9757642958973547, "grad_norm": 0.08969740569591522, "learning_rate": 1.0781518349924135e-05, "loss": 0.0031, "step": 30800 }, { "epoch": 0.9789323617931253, "grad_norm": 1.643660471017938e-05, "learning_rate": 1.0748170009837762e-05, "loss": 0.0039, "step": 30900 }, { "epoch": 0.9821004276888959, "grad_norm": 0.0014628027565777302, "learning_rate": 1.071482166975139e-05, "loss": 0.0011, "step": 31000 }, { "epoch": 0.9852684935846666, "grad_norm": 0.00014162520528770983, "learning_rate": 1.0681473329665017e-05, "loss": 0.0014, "step": 31100 }, { "epoch": 0.9884365594804372, "grad_norm": 0.0008995328098535538, "learning_rate": 1.0648124989578646e-05, "loss": 0.0015, "step": 31200 }, { "epoch": 0.9916046253762079, "grad_norm": 0.000977379735559225, "learning_rate": 1.0614776649492273e-05, "loss": 0.0018, "step": 31300 }, { "epoch": 0.9947726912719784, "grad_norm": 0.0006964199710637331, "learning_rate": 1.0581428309405901e-05, "loss": 0.0349, "step": 31400 }, { "epoch": 0.9979407571677491, "grad_norm": 0.001510333619080484, "learning_rate": 1.0548079969319528e-05, "loss": 0.0007, "step": 31500 }, { "epoch": 1.0011088230635197, "grad_norm": 0.0027738306671380997, "learning_rate": 1.0514731629233154e-05, "loss": 0.0014, "step": 31600 }, { "epoch": 1.0042768889592903, "grad_norm": 0.0002610177034512162, "learning_rate": 1.0481383289146783e-05, "loss": 0.0024, "step": 31700 }, { "epoch": 1.007444954855061, "grad_norm": 0.0035766460932791233, "learning_rate": 1.044803494906041e-05, "loss": 0.0013, "step": 31800 }, { "epoch": 1.0106130207508317, "grad_norm": 0.001029430190101266, "learning_rate": 1.0414686608974038e-05, "loss": 0.0084, "step": 31900 }, { "epoch": 1.0137810866466022, "grad_norm": 0.0004998042713850737, "learning_rate": 1.0381338268887665e-05, "loss": 0.0016, "step": 32000 }, { "epoch": 1.0169491525423728, "grad_norm": 0.03187868371605873, "learning_rate": 1.0347989928801295e-05, "loss": 0.0007, "step": 32100 }, { "epoch": 1.0201172184381435, "grad_norm": 0.0006562778726220131, "learning_rate": 1.0314641588714922e-05, "loss": 0.0018, "step": 32200 }, { "epoch": 1.0232852843339142, "grad_norm": 0.0015058261342346668, "learning_rate": 1.028129324862855e-05, "loss": 0.0017, "step": 32300 }, { "epoch": 1.0264533502296849, "grad_norm": 0.00016952259466052055, "learning_rate": 1.0247944908542179e-05, "loss": 0.0017, "step": 32400 }, { "epoch": 1.0296214161254553, "grad_norm": 7.879018994572107e-06, "learning_rate": 1.0214596568455806e-05, "loss": 0.0008, "step": 32500 }, { "epoch": 1.032789482021226, "grad_norm": 0.006021997891366482, "learning_rate": 1.0181248228369433e-05, "loss": 0.0012, "step": 32600 }, { "epoch": 1.0359575479169967, "grad_norm": 0.2344316691160202, "learning_rate": 1.0147899888283061e-05, "loss": 0.0028, "step": 32700 }, { "epoch": 1.0391256138127674, "grad_norm": 0.002558755222707987, "learning_rate": 1.011455154819669e-05, "loss": 0.0004, "step": 32800 }, { "epoch": 1.042293679708538, "grad_norm": 0.31538712978363037, "learning_rate": 1.0081203208110317e-05, "loss": 0.0007, "step": 32900 }, { "epoch": 1.0454617456043085, "grad_norm": 0.00013157808280084282, "learning_rate": 1.0047854868023945e-05, "loss": 0.0002, "step": 33000 }, { "epoch": 1.0486298115000792, "grad_norm": 0.0020028562285006046, "learning_rate": 1.0014506527937572e-05, "loss": 0.0013, "step": 33100 }, { "epoch": 1.0517978773958498, "grad_norm": 0.266053169965744, "learning_rate": 9.981158187851201e-06, "loss": 0.0013, "step": 33200 }, { "epoch": 1.0549659432916205, "grad_norm": 0.0006200580974109471, "learning_rate": 9.947809847764829e-06, "loss": 0.0009, "step": 33300 }, { "epoch": 1.0581340091873912, "grad_norm": 0.0010163384722545743, "learning_rate": 9.914461507678456e-06, "loss": 0.0004, "step": 33400 }, { "epoch": 1.0613020750831617, "grad_norm": 0.0007398608140647411, "learning_rate": 9.881113167592084e-06, "loss": 0.0006, "step": 33500 }, { "epoch": 1.0644701409789323, "grad_norm": 5.2672654419438913e-05, "learning_rate": 9.847764827505711e-06, "loss": 0.0003, "step": 33600 }, { "epoch": 1.067638206874703, "grad_norm": 0.0007165202987380326, "learning_rate": 9.814416487419339e-06, "loss": 0.0014, "step": 33700 }, { "epoch": 1.0708062727704737, "grad_norm": 0.012463639490306377, "learning_rate": 9.781068147332968e-06, "loss": 0.0006, "step": 33800 }, { "epoch": 1.0739743386662441, "grad_norm": 0.0006154962466098368, "learning_rate": 9.747719807246595e-06, "loss": 0.0002, "step": 33900 }, { "epoch": 1.0771424045620148, "grad_norm": 0.018396640196442604, "learning_rate": 9.714371467160223e-06, "loss": 0.0004, "step": 34000 }, { "epoch": 1.0803104704577855, "grad_norm": 0.0008099168189801276, "learning_rate": 9.68102312707385e-06, "loss": 0.0008, "step": 34100 }, { "epoch": 1.0834785363535562, "grad_norm": 0.01304282434284687, "learning_rate": 9.64767478698748e-06, "loss": 0.0004, "step": 34200 }, { "epoch": 1.0866466022493269, "grad_norm": 1.1287897825241089, "learning_rate": 9.614326446901107e-06, "loss": 0.0005, "step": 34300 }, { "epoch": 1.0898146681450975, "grad_norm": 0.0009092055261135101, "learning_rate": 9.580978106814734e-06, "loss": 0.0003, "step": 34400 }, { "epoch": 1.092982734040868, "grad_norm": 1.1557484867807943e-05, "learning_rate": 9.547629766728362e-06, "loss": 0.0001, "step": 34500 }, { "epoch": 1.0961507999366387, "grad_norm": 0.0008222841424867511, "learning_rate": 9.514281426641989e-06, "loss": 0.0012, "step": 34600 }, { "epoch": 1.0993188658324093, "grad_norm": 0.0022294274531304836, "learning_rate": 9.480933086555616e-06, "loss": 0.0058, "step": 34700 }, { "epoch": 1.10248693172818, "grad_norm": 0.009762086905539036, "learning_rate": 9.447584746469246e-06, "loss": 0.0006, "step": 34800 }, { "epoch": 1.1056549976239505, "grad_norm": 0.7700905799865723, "learning_rate": 9.414236406382873e-06, "loss": 0.002, "step": 34900 }, { "epoch": 1.1088230635197212, "grad_norm": 0.0003761460247915238, "learning_rate": 9.3808880662965e-06, "loss": 0.0003, "step": 35000 }, { "epoch": 1.1119911294154918, "grad_norm": 0.031011968851089478, "learning_rate": 9.347539726210128e-06, "loss": 0.0002, "step": 35100 }, { "epoch": 1.1151591953112625, "grad_norm": 5.691965634468943e-05, "learning_rate": 9.314191386123757e-06, "loss": 0.0012, "step": 35200 }, { "epoch": 1.1183272612070332, "grad_norm": 0.002426894148811698, "learning_rate": 9.280843046037384e-06, "loss": 0.0003, "step": 35300 }, { "epoch": 1.1214953271028036, "grad_norm": 8.327289106091484e-05, "learning_rate": 9.247494705951012e-06, "loss": 0.0008, "step": 35400 }, { "epoch": 1.1246633929985743, "grad_norm": 0.0014542491408064961, "learning_rate": 9.21414636586464e-06, "loss": 0.0003, "step": 35500 }, { "epoch": 1.127831458894345, "grad_norm": 0.00029037665808573365, "learning_rate": 9.180798025778268e-06, "loss": 0.0006, "step": 35600 }, { "epoch": 1.1309995247901157, "grad_norm": 0.00022046563390176743, "learning_rate": 9.147449685691896e-06, "loss": 0.0003, "step": 35700 }, { "epoch": 1.1341675906858864, "grad_norm": 0.0006549390382133424, "learning_rate": 9.114101345605523e-06, "loss": 0.0002, "step": 35800 }, { "epoch": 1.1373356565816568, "grad_norm": 7.389521488221362e-05, "learning_rate": 9.08075300551915e-06, "loss": 0.0004, "step": 35900 }, { "epoch": 1.1405037224774275, "grad_norm": 0.03006296418607235, "learning_rate": 9.047404665432778e-06, "loss": 0.0004, "step": 36000 }, { "epoch": 1.1436717883731982, "grad_norm": 0.0006366446614265442, "learning_rate": 9.014056325346406e-06, "loss": 0.0003, "step": 36100 }, { "epoch": 1.1468398542689688, "grad_norm": 1.6421807231381536e-05, "learning_rate": 8.980707985260035e-06, "loss": 0.0011, "step": 36200 }, { "epoch": 1.1500079201647395, "grad_norm": 0.0005404766998253763, "learning_rate": 8.947359645173662e-06, "loss": 0.0003, "step": 36300 }, { "epoch": 1.15317598606051, "grad_norm": 4.2021845729323104e-05, "learning_rate": 8.91401130508729e-06, "loss": 0.0005, "step": 36400 }, { "epoch": 1.1563440519562806, "grad_norm": 0.224160835146904, "learning_rate": 8.880662965000919e-06, "loss": 0.0006, "step": 36500 }, { "epoch": 1.1595121178520513, "grad_norm": 0.00010083234519697726, "learning_rate": 8.847314624914546e-06, "loss": 0.0004, "step": 36600 }, { "epoch": 1.162680183747822, "grad_norm": 0.00012005392636638135, "learning_rate": 8.813966284828174e-06, "loss": 0.0002, "step": 36700 }, { "epoch": 1.1658482496435927, "grad_norm": 0.00014081050176173449, "learning_rate": 8.780617944741801e-06, "loss": 0.0002, "step": 36800 }, { "epoch": 1.1690163155393631, "grad_norm": 0.0011462682159617543, "learning_rate": 8.74726960465543e-06, "loss": 0.0003, "step": 36900 }, { "epoch": 1.1721843814351338, "grad_norm": 0.0005480795516632497, "learning_rate": 8.713921264569056e-06, "loss": 0.0008, "step": 37000 }, { "epoch": 1.1753524473309045, "grad_norm": 0.00025665463181212544, "learning_rate": 8.680572924482683e-06, "loss": 0.0003, "step": 37100 }, { "epoch": 1.1785205132266752, "grad_norm": 0.0019408657681196928, "learning_rate": 8.647224584396313e-06, "loss": 0.0004, "step": 37200 }, { "epoch": 1.1816885791224458, "grad_norm": 8.986064131022431e-06, "learning_rate": 8.61387624430994e-06, "loss": 0.0003, "step": 37300 }, { "epoch": 1.1848566450182163, "grad_norm": 0.008489036932587624, "learning_rate": 8.580527904223567e-06, "loss": 0.0004, "step": 37400 }, { "epoch": 1.188024710913987, "grad_norm": 0.0009591535781510174, "learning_rate": 8.547179564137197e-06, "loss": 0.0004, "step": 37500 }, { "epoch": 1.1911927768097577, "grad_norm": 0.009112311527132988, "learning_rate": 8.513831224050824e-06, "loss": 0.0004, "step": 37600 }, { "epoch": 1.1943608427055283, "grad_norm": 0.9046971797943115, "learning_rate": 8.480482883964451e-06, "loss": 0.0004, "step": 37700 }, { "epoch": 1.1975289086012988, "grad_norm": 0.00033651836565695703, "learning_rate": 8.447134543878079e-06, "loss": 0.0009, "step": 37800 }, { "epoch": 1.2006969744970695, "grad_norm": 0.03974246233701706, "learning_rate": 8.413786203791708e-06, "loss": 0.0005, "step": 37900 }, { "epoch": 1.2038650403928401, "grad_norm": 5.420077286544256e-05, "learning_rate": 8.380437863705335e-06, "loss": 0.0001, "step": 38000 }, { "epoch": 1.2070331062886108, "grad_norm": 4.04864113079384e-06, "learning_rate": 8.347089523618963e-06, "loss": 0.0001, "step": 38100 }, { "epoch": 1.2102011721843815, "grad_norm": 0.011167597025632858, "learning_rate": 8.31374118353259e-06, "loss": 0.0004, "step": 38200 }, { "epoch": 1.2133692380801522, "grad_norm": 0.0032947207801043987, "learning_rate": 8.280392843446218e-06, "loss": 0.0003, "step": 38300 }, { "epoch": 1.2165373039759226, "grad_norm": 0.003191061317920685, "learning_rate": 8.247044503359845e-06, "loss": 0.0004, "step": 38400 }, { "epoch": 1.2197053698716933, "grad_norm": 0.0017466794233769178, "learning_rate": 8.213696163273474e-06, "loss": 0.0003, "step": 38500 }, { "epoch": 1.222873435767464, "grad_norm": 0.000900625775102526, "learning_rate": 8.180347823187102e-06, "loss": 0.0025, "step": 38600 }, { "epoch": 1.2260415016632347, "grad_norm": 0.015701597556471825, "learning_rate": 8.14699948310073e-06, "loss": 0.0013, "step": 38700 }, { "epoch": 1.2292095675590051, "grad_norm": 8.967128087533638e-05, "learning_rate": 8.113651143014357e-06, "loss": 0.0002, "step": 38800 }, { "epoch": 1.2323776334547758, "grad_norm": 0.23212437331676483, "learning_rate": 8.080302802927986e-06, "loss": 0.0002, "step": 38900 }, { "epoch": 1.2355456993505465, "grad_norm": 0.001443861285224557, "learning_rate": 8.046954462841613e-06, "loss": 0.0006, "step": 39000 }, { "epoch": 1.2387137652463172, "grad_norm": 0.32768702507019043, "learning_rate": 8.01360612275524e-06, "loss": 0.0002, "step": 39100 }, { "epoch": 1.2418818311420878, "grad_norm": 0.13200968503952026, "learning_rate": 7.980257782668868e-06, "loss": 0.0003, "step": 39200 }, { "epoch": 1.2450498970378585, "grad_norm": 0.041544314473867416, "learning_rate": 7.946909442582497e-06, "loss": 0.0003, "step": 39300 }, { "epoch": 1.248217962933629, "grad_norm": 0.003154648235067725, "learning_rate": 7.913561102496123e-06, "loss": 0.0003, "step": 39400 }, { "epoch": 1.2513860288293996, "grad_norm": 7.159214146668091e-05, "learning_rate": 7.880212762409752e-06, "loss": 0.0003, "step": 39500 }, { "epoch": 1.2545540947251703, "grad_norm": 0.005242425017058849, "learning_rate": 7.84686442232338e-06, "loss": 0.0005, "step": 39600 }, { "epoch": 1.257722160620941, "grad_norm": 0.12032686918973923, "learning_rate": 7.813516082237007e-06, "loss": 0.0002, "step": 39700 }, { "epoch": 1.2608902265167115, "grad_norm": 3.002350786118768e-05, "learning_rate": 7.780167742150634e-06, "loss": 0.0003, "step": 39800 }, { "epoch": 1.2640582924124821, "grad_norm": 1.667384094616864e-05, "learning_rate": 7.746819402064264e-06, "loss": 0.0003, "step": 39900 }, { "epoch": 1.2672263583082528, "grad_norm": 0.004214556887745857, "learning_rate": 7.713471061977891e-06, "loss": 0.0002, "step": 40000 }, { "epoch": 1.2703944242040235, "grad_norm": 0.00022098551562521607, "learning_rate": 7.680122721891518e-06, "loss": 0.0002, "step": 40100 }, { "epoch": 1.2735624900997942, "grad_norm": 0.0006105100037530065, "learning_rate": 7.646774381805146e-06, "loss": 0.0003, "step": 40200 }, { "epoch": 1.2767305559955648, "grad_norm": 0.0006831226055510342, "learning_rate": 7.613426041718774e-06, "loss": 0.0001, "step": 40300 }, { "epoch": 1.2798986218913353, "grad_norm": 0.00035824175574816763, "learning_rate": 7.580077701632402e-06, "loss": 0.001, "step": 40400 }, { "epoch": 1.283066687787106, "grad_norm": 0.0009726459975354373, "learning_rate": 7.54672936154603e-06, "loss": 0.0002, "step": 40500 }, { "epoch": 1.2862347536828767, "grad_norm": 3.4098738979082555e-05, "learning_rate": 7.513381021459657e-06, "loss": 0.0003, "step": 40600 }, { "epoch": 1.289402819578647, "grad_norm": 0.0016234411159530282, "learning_rate": 7.480032681373285e-06, "loss": 0.0004, "step": 40700 }, { "epoch": 1.2925708854744178, "grad_norm": 0.019855381920933723, "learning_rate": 7.446684341286913e-06, "loss": 0.0004, "step": 40800 }, { "epoch": 1.2957389513701885, "grad_norm": 0.010516811162233353, "learning_rate": 7.41333600120054e-06, "loss": 0.0002, "step": 40900 }, { "epoch": 1.2989070172659591, "grad_norm": 0.1184462159872055, "learning_rate": 7.379987661114169e-06, "loss": 0.0006, "step": 41000 }, { "epoch": 1.3020750831617298, "grad_norm": 3.537457814672962e-05, "learning_rate": 7.346639321027796e-06, "loss": 0.0002, "step": 41100 }, { "epoch": 1.3052431490575005, "grad_norm": 0.0002039948885794729, "learning_rate": 7.313290980941424e-06, "loss": 0.0002, "step": 41200 }, { "epoch": 1.308411214953271, "grad_norm": 0.06637877225875854, "learning_rate": 7.279942640855052e-06, "loss": 0.0003, "step": 41300 }, { "epoch": 1.3115792808490416, "grad_norm": 0.06105900555849075, "learning_rate": 7.24659430076868e-06, "loss": 0.0002, "step": 41400 }, { "epoch": 1.3147473467448123, "grad_norm": 0.0005946651799604297, "learning_rate": 7.213245960682308e-06, "loss": 0.0012, "step": 41500 }, { "epoch": 1.317915412640583, "grad_norm": 0.00044926407281309366, "learning_rate": 7.179897620595936e-06, "loss": 0.0005, "step": 41600 }, { "epoch": 1.3210834785363534, "grad_norm": 0.0034730539191514254, "learning_rate": 7.146549280509564e-06, "loss": 0.0022, "step": 41700 }, { "epoch": 1.3242515444321241, "grad_norm": 0.00045728174154646695, "learning_rate": 7.113200940423191e-06, "loss": 0.0002, "step": 41800 }, { "epoch": 1.3274196103278948, "grad_norm": 0.029374372214078903, "learning_rate": 7.079852600336818e-06, "loss": 0.0007, "step": 41900 }, { "epoch": 1.3305876762236655, "grad_norm": 0.010232986882328987, "learning_rate": 7.0465042602504465e-06, "loss": 0.0003, "step": 42000 }, { "epoch": 1.3337557421194361, "grad_norm": 0.00016134929319377989, "learning_rate": 7.013155920164074e-06, "loss": 0.0002, "step": 42100 }, { "epoch": 1.3369238080152068, "grad_norm": 0.0003897528804372996, "learning_rate": 6.979807580077702e-06, "loss": 0.0002, "step": 42200 }, { "epoch": 1.3400918739109773, "grad_norm": 0.0017471498576924205, "learning_rate": 6.94645923999133e-06, "loss": 0.0002, "step": 42300 }, { "epoch": 1.343259939806748, "grad_norm": 0.017642421647906303, "learning_rate": 6.913110899904958e-06, "loss": 0.0001, "step": 42400 }, { "epoch": 1.3464280057025186, "grad_norm": 0.0010317267151549459, "learning_rate": 6.879762559818585e-06, "loss": 0.0002, "step": 42500 }, { "epoch": 1.3495960715982893, "grad_norm": 0.029274389147758484, "learning_rate": 6.846414219732214e-06, "loss": 0.0005, "step": 42600 }, { "epoch": 1.3527641374940598, "grad_norm": 0.0007354663102887571, "learning_rate": 6.813065879645842e-06, "loss": 0.0001, "step": 42700 }, { "epoch": 1.3559322033898304, "grad_norm": 0.00034641989623196423, "learning_rate": 6.779717539559469e-06, "loss": 0.0002, "step": 42800 }, { "epoch": 1.3591002692856011, "grad_norm": 0.149564728140831, "learning_rate": 6.746369199473098e-06, "loss": 0.0003, "step": 42900 }, { "epoch": 1.3622683351813718, "grad_norm": 0.011665324680507183, "learning_rate": 6.713020859386724e-06, "loss": 0.0004, "step": 43000 }, { "epoch": 1.3654364010771425, "grad_norm": 0.00013306832988746464, "learning_rate": 6.679672519300352e-06, "loss": 0.0002, "step": 43100 }, { "epoch": 1.3686044669729132, "grad_norm": 0.00031623971881344914, "learning_rate": 6.64632417921398e-06, "loss": 0.0004, "step": 43200 }, { "epoch": 1.3717725328686836, "grad_norm": 0.003858871292322874, "learning_rate": 6.612975839127607e-06, "loss": 0.0008, "step": 43300 }, { "epoch": 1.3749405987644543, "grad_norm": 0.00579412467777729, "learning_rate": 6.579627499041236e-06, "loss": 0.0002, "step": 43400 }, { "epoch": 1.378108664660225, "grad_norm": 3.0335993869812228e-05, "learning_rate": 6.546279158954864e-06, "loss": 0.0004, "step": 43500 }, { "epoch": 1.3812767305559956, "grad_norm": 0.0006304982816800475, "learning_rate": 6.512930818868491e-06, "loss": 0.0001, "step": 43600 }, { "epoch": 1.384444796451766, "grad_norm": 0.00015773455379530787, "learning_rate": 6.47958247878212e-06, "loss": 0.0001, "step": 43700 }, { "epoch": 1.3876128623475368, "grad_norm": 0.005809741094708443, "learning_rate": 6.446234138695747e-06, "loss": 0.009, "step": 43800 }, { "epoch": 1.3907809282433075, "grad_norm": 0.0010448688408359885, "learning_rate": 6.412885798609375e-06, "loss": 0.0002, "step": 43900 }, { "epoch": 1.3939489941390781, "grad_norm": 0.0030797335784882307, "learning_rate": 6.379537458523003e-06, "loss": 0.0002, "step": 44000 }, { "epoch": 1.3971170600348488, "grad_norm": 0.00012675885227508843, "learning_rate": 6.346189118436631e-06, "loss": 0.0003, "step": 44100 }, { "epoch": 1.4002851259306195, "grad_norm": 0.0043240697123110294, "learning_rate": 6.312840778350258e-06, "loss": 0.0003, "step": 44200 }, { "epoch": 1.40345319182639, "grad_norm": 0.00020247649808879942, "learning_rate": 6.279492438263885e-06, "loss": 0.0002, "step": 44300 }, { "epoch": 1.4066212577221606, "grad_norm": 0.0025763397570699453, "learning_rate": 6.2461440981775134e-06, "loss": 0.0004, "step": 44400 }, { "epoch": 1.4097893236179313, "grad_norm": 0.00025575104518793523, "learning_rate": 6.212795758091142e-06, "loss": 0.0006, "step": 44500 }, { "epoch": 1.4129573895137018, "grad_norm": 0.0006715962663292885, "learning_rate": 6.179447418004769e-06, "loss": 0.0002, "step": 44600 }, { "epoch": 1.4161254554094724, "grad_norm": 0.0005802169325761497, "learning_rate": 6.1460990779183974e-06, "loss": 0.0004, "step": 44700 }, { "epoch": 1.419293521305243, "grad_norm": 6.348552687995834e-06, "learning_rate": 6.112750737832025e-06, "loss": 0.0002, "step": 44800 }, { "epoch": 1.4224615872010138, "grad_norm": 0.000993837951682508, "learning_rate": 6.079402397745653e-06, "loss": 0.0002, "step": 44900 }, { "epoch": 1.4256296530967845, "grad_norm": 0.036865074187517166, "learning_rate": 6.046054057659281e-06, "loss": 0.0004, "step": 45000 }, { "epoch": 1.4287977189925551, "grad_norm": 0.09351787716150284, "learning_rate": 6.012705717572909e-06, "loss": 0.0003, "step": 45100 }, { "epoch": 1.4319657848883256, "grad_norm": 0.00022034939320292324, "learning_rate": 5.979357377486536e-06, "loss": 0.0002, "step": 45200 }, { "epoch": 1.4351338507840963, "grad_norm": 0.002382364822551608, "learning_rate": 5.946009037400165e-06, "loss": 0.0002, "step": 45300 }, { "epoch": 1.438301916679867, "grad_norm": 0.0010514174355193973, "learning_rate": 5.912660697313791e-06, "loss": 0.0001, "step": 45400 }, { "epoch": 1.4414699825756376, "grad_norm": 0.0006282671820372343, "learning_rate": 5.8793123572274195e-06, "loss": 0.0002, "step": 45500 }, { "epoch": 1.444638048471408, "grad_norm": 0.0001003501529339701, "learning_rate": 5.845964017141047e-06, "loss": 0.0002, "step": 45600 }, { "epoch": 1.4478061143671788, "grad_norm": 0.0019161907257512212, "learning_rate": 5.812615677054675e-06, "loss": 0.0002, "step": 45700 }, { "epoch": 1.4509741802629494, "grad_norm": 0.0002770457649603486, "learning_rate": 5.779267336968303e-06, "loss": 0.0004, "step": 45800 }, { "epoch": 1.4541422461587201, "grad_norm": 0.001281541888602078, "learning_rate": 5.745918996881931e-06, "loss": 0.0003, "step": 45900 }, { "epoch": 1.4573103120544908, "grad_norm": 0.028755199164152145, "learning_rate": 5.712570656795558e-06, "loss": 0.0002, "step": 46000 }, { "epoch": 1.4604783779502615, "grad_norm": 0.0007570263114757836, "learning_rate": 5.679222316709187e-06, "loss": 0.0003, "step": 46100 }, { "epoch": 1.463646443846032, "grad_norm": 0.00423109345138073, "learning_rate": 5.645873976622814e-06, "loss": 0.0001, "step": 46200 }, { "epoch": 1.4668145097418026, "grad_norm": 5.3798950830241665e-05, "learning_rate": 5.612525636536442e-06, "loss": 0.0003, "step": 46300 }, { "epoch": 1.4699825756375733, "grad_norm": 7.343962352024391e-05, "learning_rate": 5.57917729645007e-06, "loss": 0.0003, "step": 46400 }, { "epoch": 1.473150641533344, "grad_norm": 0.025961237028241158, "learning_rate": 5.545828956363698e-06, "loss": 0.0002, "step": 46500 }, { "epoch": 1.4763187074291144, "grad_norm": 0.00014132962678559124, "learning_rate": 5.512480616277325e-06, "loss": 0.0002, "step": 46600 }, { "epoch": 1.479486773324885, "grad_norm": 0.0044896723702549934, "learning_rate": 5.479132276190953e-06, "loss": 0.0002, "step": 46700 }, { "epoch": 1.4826548392206558, "grad_norm": 5.546275497181341e-05, "learning_rate": 5.44578393610458e-06, "loss": 0.0002, "step": 46800 }, { "epoch": 1.4858229051164265, "grad_norm": 0.0015877331607043743, "learning_rate": 5.412435596018209e-06, "loss": 0.0001, "step": 46900 }, { "epoch": 1.4889909710121971, "grad_norm": 8.771561260800809e-05, "learning_rate": 5.379087255931836e-06, "loss": 0.0001, "step": 47000 }, { "epoch": 1.4921590369079678, "grad_norm": 9.971875260816887e-05, "learning_rate": 5.345738915845464e-06, "loss": 0.0001, "step": 47100 }, { "epoch": 1.4953271028037383, "grad_norm": 6.665828550467268e-05, "learning_rate": 5.312390575759092e-06, "loss": 0.0001, "step": 47200 }, { "epoch": 1.498495168699509, "grad_norm": 0.12465495616197586, "learning_rate": 5.27904223567272e-06, "loss": 0.0003, "step": 47300 }, { "epoch": 1.5016632345952796, "grad_norm": 0.00016497267642989755, "learning_rate": 5.2456938955863476e-06, "loss": 0.0001, "step": 47400 }, { "epoch": 1.50483130049105, "grad_norm": 0.000793833751231432, "learning_rate": 5.212345555499976e-06, "loss": 0.0001, "step": 47500 }, { "epoch": 1.5079993663868207, "grad_norm": 0.004431690089404583, "learning_rate": 5.178997215413603e-06, "loss": 0.0001, "step": 47600 }, { "epoch": 1.5111674322825914, "grad_norm": 8.146934123942629e-05, "learning_rate": 5.1456488753272316e-06, "loss": 0.0004, "step": 47700 }, { "epoch": 1.514335498178362, "grad_norm": 0.0032630818895995617, "learning_rate": 5.112300535240858e-06, "loss": 0.0003, "step": 47800 }, { "epoch": 1.5175035640741328, "grad_norm": 3.78349454877025e-06, "learning_rate": 5.0789521951544864e-06, "loss": 0.0002, "step": 47900 }, { "epoch": 1.5206716299699035, "grad_norm": 7.793370605213568e-05, "learning_rate": 5.045603855068114e-06, "loss": 0.0002, "step": 48000 }, { "epoch": 1.5238396958656741, "grad_norm": 1.0998847756127361e-05, "learning_rate": 5.012255514981742e-06, "loss": 0.0002, "step": 48100 }, { "epoch": 1.5270077617614446, "grad_norm": 7.101731171132997e-05, "learning_rate": 4.97890717489537e-06, "loss": 0.0002, "step": 48200 }, { "epoch": 1.5301758276572153, "grad_norm": 0.04721503704786301, "learning_rate": 4.945558834808998e-06, "loss": 0.0005, "step": 48300 }, { "epoch": 1.533343893552986, "grad_norm": 0.04013681039214134, "learning_rate": 4.912210494722625e-06, "loss": 0.0002, "step": 48400 }, { "epoch": 1.5365119594487564, "grad_norm": 0.15584056079387665, "learning_rate": 4.878862154636254e-06, "loss": 0.0002, "step": 48500 }, { "epoch": 1.539680025344527, "grad_norm": 0.008085441775619984, "learning_rate": 4.845513814549881e-06, "loss": 0.0002, "step": 48600 }, { "epoch": 1.5428480912402978, "grad_norm": 0.00025607392308302224, "learning_rate": 4.8121654744635085e-06, "loss": 0.0001, "step": 48700 }, { "epoch": 1.5460161571360684, "grad_norm": 0.009352591820061207, "learning_rate": 4.778817134377137e-06, "loss": 0.0001, "step": 48800 }, { "epoch": 1.5491842230318391, "grad_norm": 0.00013986592239234596, "learning_rate": 4.745468794290764e-06, "loss": 0.0001, "step": 48900 }, { "epoch": 1.5523522889276098, "grad_norm": 0.0005525100277736783, "learning_rate": 4.7121204542043925e-06, "loss": 0.0002, "step": 49000 }, { "epoch": 1.5555203548233805, "grad_norm": 0.002130384324118495, "learning_rate": 4.678772114118021e-06, "loss": 0.0003, "step": 49100 }, { "epoch": 1.558688420719151, "grad_norm": 0.00036302325315773487, "learning_rate": 4.645423774031647e-06, "loss": 0.0002, "step": 49200 }, { "epoch": 1.5618564866149216, "grad_norm": 2.546385076129809e-05, "learning_rate": 4.612075433945276e-06, "loss": 0.0002, "step": 49300 }, { "epoch": 1.565024552510692, "grad_norm": 0.0013008471578359604, "learning_rate": 4.578727093858903e-06, "loss": 0.0002, "step": 49400 }, { "epoch": 1.5681926184064627, "grad_norm": 0.0018331869505345821, "learning_rate": 4.545378753772531e-06, "loss": 0.0002, "step": 49500 }, { "epoch": 1.5713606843022334, "grad_norm": 0.00046941745677031577, "learning_rate": 4.51203041368616e-06, "loss": 0.0001, "step": 49600 }, { "epoch": 1.574528750198004, "grad_norm": 0.09666042774915695, "learning_rate": 4.478682073599787e-06, "loss": 0.0001, "step": 49700 }, { "epoch": 1.5776968160937748, "grad_norm": 0.000714512774720788, "learning_rate": 4.4453337335134145e-06, "loss": 0.0002, "step": 49800 }, { "epoch": 1.5808648819895454, "grad_norm": 0.001266203005798161, "learning_rate": 4.411985393427042e-06, "loss": 0.0002, "step": 49900 }, { "epoch": 1.5840329478853161, "grad_norm": 0.00010851142724277452, "learning_rate": 4.37863705334067e-06, "loss": 0.0001, "step": 50000 }, { "epoch": 1.5872010137810868, "grad_norm": 0.02388921193778515, "learning_rate": 4.3452887132542985e-06, "loss": 0.0001, "step": 50100 }, { "epoch": 1.5903690796768573, "grad_norm": 0.004322574008256197, "learning_rate": 4.311940373167926e-06, "loss": 0.0001, "step": 50200 }, { "epoch": 1.593537145572628, "grad_norm": 0.0002055590011877939, "learning_rate": 4.278592033081554e-06, "loss": 0.0008, "step": 50300 }, { "epoch": 1.5967052114683984, "grad_norm": 0.006136850919574499, "learning_rate": 4.245243692995181e-06, "loss": 0.0001, "step": 50400 }, { "epoch": 1.599873277364169, "grad_norm": 0.03570784255862236, "learning_rate": 4.211895352908809e-06, "loss": 0.0002, "step": 50500 }, { "epoch": 1.6030413432599397, "grad_norm": 0.00013556861085817218, "learning_rate": 4.178547012822437e-06, "loss": 0.0002, "step": 50600 }, { "epoch": 1.6062094091557104, "grad_norm": 0.00039382753311656415, "learning_rate": 4.145198672736065e-06, "loss": 0.0, "step": 50700 }, { "epoch": 1.609377475051481, "grad_norm": 0.014802640303969383, "learning_rate": 4.111850332649693e-06, "loss": 0.0001, "step": 50800 }, { "epoch": 1.6125455409472518, "grad_norm": 0.002426808699965477, "learning_rate": 4.0785019925633206e-06, "loss": 0.0001, "step": 50900 }, { "epoch": 1.6157136068430225, "grad_norm": 0.0027719761710613966, "learning_rate": 4.045153652476948e-06, "loss": 0.0003, "step": 51000 }, { "epoch": 1.6188816727387931, "grad_norm": 0.0003508755180519074, "learning_rate": 4.011805312390576e-06, "loss": 0.0007, "step": 51100 }, { "epoch": 1.6220497386345636, "grad_norm": 0.00042187023791484535, "learning_rate": 3.978456972304204e-06, "loss": 0.0001, "step": 51200 }, { "epoch": 1.6252178045303343, "grad_norm": 0.00010520713840378448, "learning_rate": 3.945108632217832e-06, "loss": 0.0001, "step": 51300 }, { "epoch": 1.6283858704261047, "grad_norm": 2.475667861290276e-05, "learning_rate": 3.9117602921314594e-06, "loss": 0.0002, "step": 51400 }, { "epoch": 1.6315539363218754, "grad_norm": 0.00042487168684601784, "learning_rate": 3.878411952045088e-06, "loss": 0.0002, "step": 51500 }, { "epoch": 1.634722002217646, "grad_norm": 0.0007512226002290845, "learning_rate": 3.845063611958715e-06, "loss": 0.0003, "step": 51600 }, { "epoch": 1.6378900681134168, "grad_norm": 0.0007686197641305625, "learning_rate": 3.8117152718723426e-06, "loss": 0.0002, "step": 51700 }, { "epoch": 1.6410581340091874, "grad_norm": 0.006279453635215759, "learning_rate": 3.7783669317859705e-06, "loss": 0.0002, "step": 51800 }, { "epoch": 1.644226199904958, "grad_norm": 5.683067865902558e-05, "learning_rate": 3.7450185916995983e-06, "loss": 0.0001, "step": 51900 }, { "epoch": 1.6473942658007288, "grad_norm": 0.08361367881298065, "learning_rate": 3.711670251613226e-06, "loss": 0.0002, "step": 52000 }, { "epoch": 1.6505623316964992, "grad_norm": 1.9708577394485474, "learning_rate": 3.678321911526854e-06, "loss": 0.0004, "step": 52100 }, { "epoch": 1.65373039759227, "grad_norm": 0.01258891262114048, "learning_rate": 3.6449735714404815e-06, "loss": 0.0002, "step": 52200 }, { "epoch": 1.6568984634880406, "grad_norm": 1.2053630598529708e-05, "learning_rate": 3.6116252313541093e-06, "loss": 0.0001, "step": 52300 }, { "epoch": 1.660066529383811, "grad_norm": 0.007339359261095524, "learning_rate": 3.578276891267737e-06, "loss": 0.0006, "step": 52400 }, { "epoch": 1.6632345952795817, "grad_norm": 7.470462151104584e-05, "learning_rate": 3.544928551181365e-06, "loss": 0.0003, "step": 52500 }, { "epoch": 1.6664026611753524, "grad_norm": 0.001069075195118785, "learning_rate": 3.5115802110949933e-06, "loss": 0.0001, "step": 52600 }, { "epoch": 1.669570727071123, "grad_norm": 0.0014466423308476806, "learning_rate": 3.478231871008621e-06, "loss": 0.0001, "step": 52700 }, { "epoch": 1.6727387929668938, "grad_norm": 0.0008716689771972597, "learning_rate": 3.4448835309222482e-06, "loss": 0.0001, "step": 52800 }, { "epoch": 1.6759068588626644, "grad_norm": 0.0006247049896046519, "learning_rate": 3.411535190835876e-06, "loss": 0.0002, "step": 52900 }, { "epoch": 1.6790749247584351, "grad_norm": 0.0016816813731566072, "learning_rate": 3.378186850749504e-06, "loss": 0.0001, "step": 53000 }, { "epoch": 1.6822429906542056, "grad_norm": 0.015677401795983315, "learning_rate": 3.3448385106631322e-06, "loss": 0.0001, "step": 53100 }, { "epoch": 1.6854110565499762, "grad_norm": 0.0020264824852347374, "learning_rate": 3.31149017057676e-06, "loss": 0.0003, "step": 53200 }, { "epoch": 1.6885791224457467, "grad_norm": 0.006521924398839474, "learning_rate": 3.278141830490388e-06, "loss": 0.0002, "step": 53300 }, { "epoch": 1.6917471883415174, "grad_norm": 0.055716466158628464, "learning_rate": 3.244793490404015e-06, "loss": 0.0001, "step": 53400 }, { "epoch": 1.694915254237288, "grad_norm": 0.0009364295983687043, "learning_rate": 3.2114451503176432e-06, "loss": 0.0002, "step": 53500 }, { "epoch": 1.6980833201330587, "grad_norm": 0.026155732572078705, "learning_rate": 3.178096810231271e-06, "loss": 0.0001, "step": 53600 }, { "epoch": 1.7012513860288294, "grad_norm": 0.0009285922278650105, "learning_rate": 3.144748470144899e-06, "loss": 0.0009, "step": 53700 }, { "epoch": 1.7044194519246, "grad_norm": 0.00019248783064540476, "learning_rate": 3.111400130058527e-06, "loss": 0.0002, "step": 53800 }, { "epoch": 1.7075875178203708, "grad_norm": 0.008935322985053062, "learning_rate": 3.0780517899721547e-06, "loss": 0.0002, "step": 53900 }, { "epoch": 1.7107555837161414, "grad_norm": 3.448131974437274e-05, "learning_rate": 3.044703449885782e-06, "loss": 0.0002, "step": 54000 }, { "epoch": 1.713923649611912, "grad_norm": 0.0037920591421425343, "learning_rate": 3.01135510979941e-06, "loss": 0.0001, "step": 54100 }, { "epoch": 1.7170917155076826, "grad_norm": 0.0004557653737720102, "learning_rate": 2.978006769713038e-06, "loss": 0.0001, "step": 54200 }, { "epoch": 1.720259781403453, "grad_norm": 0.001680429675616324, "learning_rate": 2.9446584296266657e-06, "loss": 0.0002, "step": 54300 }, { "epoch": 1.7234278472992237, "grad_norm": 0.006457743234932423, "learning_rate": 2.9113100895402936e-06, "loss": 0.0001, "step": 54400 }, { "epoch": 1.7265959131949944, "grad_norm": 0.0018418490653857589, "learning_rate": 2.8779617494539214e-06, "loss": 0.0002, "step": 54500 }, { "epoch": 1.729763979090765, "grad_norm": 0.012908555567264557, "learning_rate": 2.844613409367549e-06, "loss": 0.0001, "step": 54600 }, { "epoch": 1.7329320449865357, "grad_norm": 0.0021234566811472178, "learning_rate": 2.8112650692811767e-06, "loss": 0.0002, "step": 54700 }, { "epoch": 1.7361001108823064, "grad_norm": 0.3285054862499237, "learning_rate": 2.7779167291948046e-06, "loss": 0.0001, "step": 54800 }, { "epoch": 1.739268176778077, "grad_norm": 0.0007893216679804027, "learning_rate": 2.7445683891084324e-06, "loss": 0.0001, "step": 54900 }, { "epoch": 1.7424362426738478, "grad_norm": 0.02494579553604126, "learning_rate": 2.7112200490220603e-06, "loss": 0.0002, "step": 55000 }, { "epoch": 1.7456043085696182, "grad_norm": 0.0023814570158720016, "learning_rate": 2.677871708935688e-06, "loss": 0.0002, "step": 55100 }, { "epoch": 1.748772374465389, "grad_norm": 0.0058886525221168995, "learning_rate": 2.6445233688493156e-06, "loss": 0.0002, "step": 55200 }, { "epoch": 1.7519404403611594, "grad_norm": 4.876612001680769e-05, "learning_rate": 2.6111750287629435e-06, "loss": 0.0002, "step": 55300 }, { "epoch": 1.75510850625693, "grad_norm": 0.010443676263093948, "learning_rate": 2.5778266886765713e-06, "loss": 0.0002, "step": 55400 }, { "epoch": 1.7582765721527007, "grad_norm": 0.01249265018850565, "learning_rate": 2.544478348590199e-06, "loss": 0.0001, "step": 55500 }, { "epoch": 1.7614446380484714, "grad_norm": 0.00021050056966487318, "learning_rate": 2.511130008503827e-06, "loss": 0.0002, "step": 55600 }, { "epoch": 1.764612703944242, "grad_norm": 0.01633504591882229, "learning_rate": 2.4777816684174545e-06, "loss": 0.0001, "step": 55700 }, { "epoch": 1.7677807698400128, "grad_norm": 3.676281266962178e-05, "learning_rate": 2.4444333283310823e-06, "loss": 0.0003, "step": 55800 }, { "epoch": 1.7709488357357834, "grad_norm": 0.002830359386280179, "learning_rate": 2.4110849882447106e-06, "loss": 0.0001, "step": 55900 }, { "epoch": 1.7741169016315539, "grad_norm": 0.0029975976794958115, "learning_rate": 2.377736648158338e-06, "loss": 0.0002, "step": 56000 }, { "epoch": 1.7772849675273246, "grad_norm": 0.005194125231355429, "learning_rate": 2.344388308071966e-06, "loss": 0.0001, "step": 56100 }, { "epoch": 1.7804530334230952, "grad_norm": 7.272951734194066e-06, "learning_rate": 2.3110399679855938e-06, "loss": 0.0003, "step": 56200 }, { "epoch": 1.7836210993188657, "grad_norm": 0.0007229465409182012, "learning_rate": 2.2776916278992216e-06, "loss": 0.0002, "step": 56300 }, { "epoch": 1.7867891652146364, "grad_norm": 0.0005428678123280406, "learning_rate": 2.2443432878128495e-06, "loss": 0.0004, "step": 56400 }, { "epoch": 1.789957231110407, "grad_norm": 0.017834417521953583, "learning_rate": 2.2109949477264774e-06, "loss": 0.0002, "step": 56500 }, { "epoch": 1.7931252970061777, "grad_norm": 0.0003519799211062491, "learning_rate": 2.177646607640105e-06, "loss": 0.0002, "step": 56600 }, { "epoch": 1.7962933629019484, "grad_norm": 5.785848043160513e-05, "learning_rate": 2.1442982675537327e-06, "loss": 0.0001, "step": 56700 }, { "epoch": 1.799461428797719, "grad_norm": 0.00827944464981556, "learning_rate": 2.1109499274673605e-06, "loss": 0.0001, "step": 56800 }, { "epoch": 1.8026294946934898, "grad_norm": 8.96235360414721e-06, "learning_rate": 2.0776015873809884e-06, "loss": 0.0001, "step": 56900 }, { "epoch": 1.8057975605892602, "grad_norm": 8.50809519761242e-05, "learning_rate": 2.0442532472946162e-06, "loss": 0.0002, "step": 57000 }, { "epoch": 1.808965626485031, "grad_norm": 0.0564473532140255, "learning_rate": 2.010904907208244e-06, "loss": 0.0002, "step": 57100 }, { "epoch": 1.8121336923808016, "grad_norm": 0.0019106407416984439, "learning_rate": 1.9775565671218715e-06, "loss": 0.0002, "step": 57200 }, { "epoch": 1.815301758276572, "grad_norm": 0.0011765076778829098, "learning_rate": 1.9442082270354994e-06, "loss": 0.0002, "step": 57300 }, { "epoch": 1.8184698241723427, "grad_norm": 0.002034899080172181, "learning_rate": 1.9108598869491273e-06, "loss": 0.0001, "step": 57400 }, { "epoch": 1.8216378900681134, "grad_norm": 2.886955189751461e-05, "learning_rate": 1.877511546862755e-06, "loss": 0.0003, "step": 57500 }, { "epoch": 1.824805955963884, "grad_norm": 0.0003313591005280614, "learning_rate": 1.844163206776383e-06, "loss": 0.0001, "step": 57600 }, { "epoch": 1.8279740218596547, "grad_norm": 0.0030703512020409107, "learning_rate": 1.8108148666900108e-06, "loss": 0.0001, "step": 57700 }, { "epoch": 1.8311420877554254, "grad_norm": 0.05901242792606354, "learning_rate": 1.7774665266036383e-06, "loss": 0.0002, "step": 57800 }, { "epoch": 1.834310153651196, "grad_norm": 0.00020717663574032485, "learning_rate": 1.7441181865172664e-06, "loss": 0.0002, "step": 57900 }, { "epoch": 1.8374782195469666, "grad_norm": 3.810102498391643e-05, "learning_rate": 1.7107698464308942e-06, "loss": 0.0007, "step": 58000 }, { "epoch": 1.8406462854427372, "grad_norm": 0.00012426413013599813, "learning_rate": 1.6774215063445219e-06, "loss": 0.0001, "step": 58100 }, { "epoch": 1.8438143513385077, "grad_norm": 0.00743386335670948, "learning_rate": 1.6440731662581497e-06, "loss": 0.0001, "step": 58200 }, { "epoch": 1.8469824172342784, "grad_norm": 0.04067447409033775, "learning_rate": 1.6107248261717776e-06, "loss": 0.0001, "step": 58300 }, { "epoch": 1.850150483130049, "grad_norm": 2.0743360437336378e-05, "learning_rate": 1.5773764860854052e-06, "loss": 0.0001, "step": 58400 }, { "epoch": 1.8533185490258197, "grad_norm": 0.006036572623997927, "learning_rate": 1.544028145999033e-06, "loss": 0.0001, "step": 58500 }, { "epoch": 1.8564866149215904, "grad_norm": 0.0008050315082073212, "learning_rate": 1.510679805912661e-06, "loss": 0.0001, "step": 58600 } ], "logging_steps": 100, "max_steps": 63130, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }