mordernBERT-multilingual-legal-1e / trainer_state.json
anhtuansh's picture
Upload 15 files
363622e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.8564866149215904,
"eval_steps": 500,
"global_step": 58600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003168065895770632,
"grad_norm": 0.0008460358949378133,
"learning_rate": 6.335128286347799e-07,
"loss": 0.0007,
"step": 100
},
{
"epoch": 0.006336131791541264,
"grad_norm": 0.006260558497160673,
"learning_rate": 1.2670256572695599e-06,
"loss": 0.0004,
"step": 200
},
{
"epoch": 0.009504197687311896,
"grad_norm": 0.0002755998866632581,
"learning_rate": 1.9005384859043396e-06,
"loss": 0.0027,
"step": 300
},
{
"epoch": 0.012672263583082528,
"grad_norm": 0.004525843542069197,
"learning_rate": 2.5340513145391198e-06,
"loss": 0.003,
"step": 400
},
{
"epoch": 0.01584032947885316,
"grad_norm": 0.009563793428242207,
"learning_rate": 3.1675641431738997e-06,
"loss": 0.0002,
"step": 500
},
{
"epoch": 0.01900839537462379,
"grad_norm": 2.0679131921497174e-05,
"learning_rate": 3.801076971808679e-06,
"loss": 0.0025,
"step": 600
},
{
"epoch": 0.022176461270394424,
"grad_norm": 3.369219848536886e-05,
"learning_rate": 4.434589800443459e-06,
"loss": 0.0007,
"step": 700
},
{
"epoch": 0.025344527166165056,
"grad_norm": 0.03423422574996948,
"learning_rate": 5.0681026290782395e-06,
"loss": 0.0028,
"step": 800
},
{
"epoch": 0.02851259306193569,
"grad_norm": 0.028383228927850723,
"learning_rate": 5.701615457713019e-06,
"loss": 0.001,
"step": 900
},
{
"epoch": 0.03168065895770632,
"grad_norm": 8.241041359724477e-05,
"learning_rate": 6.335128286347799e-06,
"loss": 0.001,
"step": 1000
},
{
"epoch": 0.03484872485347695,
"grad_norm": 0.015536214224994183,
"learning_rate": 6.96864111498258e-06,
"loss": 0.0024,
"step": 1100
},
{
"epoch": 0.03801679074924758,
"grad_norm": 0.0755501538515091,
"learning_rate": 7.602153943617358e-06,
"loss": 0.0006,
"step": 1200
},
{
"epoch": 0.04118485664501822,
"grad_norm": 0.011010600253939629,
"learning_rate": 8.235666772252139e-06,
"loss": 0.0056,
"step": 1300
},
{
"epoch": 0.04435292254078885,
"grad_norm": 4.7160243411781266e-05,
"learning_rate": 8.869179600886918e-06,
"loss": 0.0037,
"step": 1400
},
{
"epoch": 0.04752098843655948,
"grad_norm": 0.04266763851046562,
"learning_rate": 9.502692429521698e-06,
"loss": 0.0026,
"step": 1500
},
{
"epoch": 0.05068905433233011,
"grad_norm": 0.03944120928645134,
"learning_rate": 1.0136205258156479e-05,
"loss": 0.0012,
"step": 1600
},
{
"epoch": 0.05385712022810074,
"grad_norm": 0.0018407816532999277,
"learning_rate": 1.0769718086791259e-05,
"loss": 0.0005,
"step": 1700
},
{
"epoch": 0.05702518612387138,
"grad_norm": 0.0038744837511330843,
"learning_rate": 1.1403230915426038e-05,
"loss": 0.0003,
"step": 1800
},
{
"epoch": 0.060193252019642006,
"grad_norm": 0.002657032571732998,
"learning_rate": 1.203674374406082e-05,
"loss": 0.0006,
"step": 1900
},
{
"epoch": 0.06336131791541263,
"grad_norm": 0.001354015665128827,
"learning_rate": 1.2670256572695599e-05,
"loss": 0.0004,
"step": 2000
},
{
"epoch": 0.06652938381118327,
"grad_norm": 0.03860320523381233,
"learning_rate": 1.3303769401330378e-05,
"loss": 0.0034,
"step": 2100
},
{
"epoch": 0.0696974497069539,
"grad_norm": 0.004643497057259083,
"learning_rate": 1.393728222996516e-05,
"loss": 0.0015,
"step": 2200
},
{
"epoch": 0.07286551560272454,
"grad_norm": 0.0005492149502970278,
"learning_rate": 1.4570795058599937e-05,
"loss": 0.0011,
"step": 2300
},
{
"epoch": 0.07603358149849516,
"grad_norm": 0.005505191162228584,
"learning_rate": 1.5204307887234717e-05,
"loss": 0.0006,
"step": 2400
},
{
"epoch": 0.0792016473942658,
"grad_norm": 0.00082977837882936,
"learning_rate": 1.58378207158695e-05,
"loss": 0.0015,
"step": 2500
},
{
"epoch": 0.08236971329003644,
"grad_norm": 6.766305159544572e-05,
"learning_rate": 1.6471333544504278e-05,
"loss": 0.0025,
"step": 2600
},
{
"epoch": 0.08553777918580706,
"grad_norm": 0.06128118559718132,
"learning_rate": 1.7104846373139055e-05,
"loss": 0.0006,
"step": 2700
},
{
"epoch": 0.0887058450815777,
"grad_norm": 0.43053677678108215,
"learning_rate": 1.7738359201773837e-05,
"loss": 0.0007,
"step": 2800
},
{
"epoch": 0.09187391097734833,
"grad_norm": 0.00012383765715640038,
"learning_rate": 1.8371872030408618e-05,
"loss": 0.0011,
"step": 2900
},
{
"epoch": 0.09504197687311897,
"grad_norm": 3.1947878596838564e-05,
"learning_rate": 1.9005384859043396e-05,
"loss": 0.0009,
"step": 3000
},
{
"epoch": 0.09821004276888959,
"grad_norm": 0.0004990168381482363,
"learning_rate": 1.9638897687678177e-05,
"loss": 0.0019,
"step": 3100
},
{
"epoch": 0.10137810866466022,
"grad_norm": 0.003429220989346504,
"learning_rate": 1.998566021376286e-05,
"loss": 0.0036,
"step": 3200
},
{
"epoch": 0.10454617456043086,
"grad_norm": 0.00011575384996831417,
"learning_rate": 1.995231187367649e-05,
"loss": 0.0047,
"step": 3300
},
{
"epoch": 0.10771424045620148,
"grad_norm": 0.001659040222875774,
"learning_rate": 1.9918963533590115e-05,
"loss": 0.0044,
"step": 3400
},
{
"epoch": 0.11088230635197212,
"grad_norm": 0.00041119763045571744,
"learning_rate": 1.9885615193503744e-05,
"loss": 0.0018,
"step": 3500
},
{
"epoch": 0.11405037224774275,
"grad_norm": 0.003299827454611659,
"learning_rate": 1.9852266853417373e-05,
"loss": 0.001,
"step": 3600
},
{
"epoch": 0.11721843814351339,
"grad_norm": 8.044855348998681e-05,
"learning_rate": 1.9818918513331e-05,
"loss": 0.0027,
"step": 3700
},
{
"epoch": 0.12038650403928401,
"grad_norm": 0.00031627726275473833,
"learning_rate": 1.9785570173244628e-05,
"loss": 0.0061,
"step": 3800
},
{
"epoch": 0.12355456993505465,
"grad_norm": 0.001032638712786138,
"learning_rate": 1.9752221833158257e-05,
"loss": 0.001,
"step": 3900
},
{
"epoch": 0.12672263583082527,
"grad_norm": 0.0009450612124055624,
"learning_rate": 1.9718873493071883e-05,
"loss": 0.0074,
"step": 4000
},
{
"epoch": 0.12989070172659592,
"grad_norm": 0.6000776290893555,
"learning_rate": 1.9685525152985512e-05,
"loss": 0.0061,
"step": 4100
},
{
"epoch": 0.13305876762236654,
"grad_norm": 0.1603873372077942,
"learning_rate": 1.965217681289914e-05,
"loss": 0.0015,
"step": 4200
},
{
"epoch": 0.13622683351813716,
"grad_norm": 0.0001428043033229187,
"learning_rate": 1.9618828472812767e-05,
"loss": 0.0024,
"step": 4300
},
{
"epoch": 0.1393948994139078,
"grad_norm": 0.042562585324048996,
"learning_rate": 1.9585480132726396e-05,
"loss": 0.0051,
"step": 4400
},
{
"epoch": 0.14256296530967844,
"grad_norm": 0.2654358744621277,
"learning_rate": 1.9552131792640022e-05,
"loss": 0.0036,
"step": 4500
},
{
"epoch": 0.14573103120544909,
"grad_norm": 0.0015032750088721514,
"learning_rate": 1.951878345255365e-05,
"loss": 0.0004,
"step": 4600
},
{
"epoch": 0.1488990971012197,
"grad_norm": 9.152581333182752e-05,
"learning_rate": 1.948543511246728e-05,
"loss": 0.0059,
"step": 4700
},
{
"epoch": 0.15206716299699033,
"grad_norm": 0.013867395929992199,
"learning_rate": 1.9452086772380906e-05,
"loss": 0.0009,
"step": 4800
},
{
"epoch": 0.15523522889276098,
"grad_norm": 0.018314680084586143,
"learning_rate": 1.9418738432294535e-05,
"loss": 0.001,
"step": 4900
},
{
"epoch": 0.1584032947885316,
"grad_norm": 0.17640839517116547,
"learning_rate": 1.9385390092208164e-05,
"loss": 0.0008,
"step": 5000
},
{
"epoch": 0.16157136068430222,
"grad_norm": 0.051024582237005234,
"learning_rate": 1.935204175212179e-05,
"loss": 0.0011,
"step": 5100
},
{
"epoch": 0.16473942658007287,
"grad_norm": 0.003037165617570281,
"learning_rate": 1.931869341203542e-05,
"loss": 0.0009,
"step": 5200
},
{
"epoch": 0.1679074924758435,
"grad_norm": 0.011393848806619644,
"learning_rate": 1.9285345071949045e-05,
"loss": 0.0087,
"step": 5300
},
{
"epoch": 0.17107555837161412,
"grad_norm": 0.0004570337769109756,
"learning_rate": 1.925199673186267e-05,
"loss": 0.0018,
"step": 5400
},
{
"epoch": 0.17424362426738477,
"grad_norm": 4.8830272135091946e-05,
"learning_rate": 1.92186483917763e-05,
"loss": 0.0071,
"step": 5500
},
{
"epoch": 0.1774116901631554,
"grad_norm": 0.3802063763141632,
"learning_rate": 1.918530005168993e-05,
"loss": 0.0021,
"step": 5600
},
{
"epoch": 0.18057975605892604,
"grad_norm": 0.067596435546875,
"learning_rate": 1.9151951711603555e-05,
"loss": 0.0009,
"step": 5700
},
{
"epoch": 0.18374782195469666,
"grad_norm": 0.0005965412128716707,
"learning_rate": 1.9118603371517184e-05,
"loss": 0.001,
"step": 5800
},
{
"epoch": 0.18691588785046728,
"grad_norm": 0.13070069253444672,
"learning_rate": 1.9085255031430813e-05,
"loss": 0.0005,
"step": 5900
},
{
"epoch": 0.19008395374623793,
"grad_norm": 0.020468149334192276,
"learning_rate": 1.905190669134444e-05,
"loss": 0.006,
"step": 6000
},
{
"epoch": 0.19325201964200855,
"grad_norm": 0.026921125128865242,
"learning_rate": 1.9018558351258068e-05,
"loss": 0.004,
"step": 6100
},
{
"epoch": 0.19642008553777918,
"grad_norm": 0.025172384455800056,
"learning_rate": 1.8985210011171697e-05,
"loss": 0.0007,
"step": 6200
},
{
"epoch": 0.19958815143354983,
"grad_norm": 0.0012900714064016938,
"learning_rate": 1.8951861671085323e-05,
"loss": 0.0005,
"step": 6300
},
{
"epoch": 0.20275621732932045,
"grad_norm": 0.007845859974622726,
"learning_rate": 1.8918513330998952e-05,
"loss": 0.0012,
"step": 6400
},
{
"epoch": 0.20592428322509107,
"grad_norm": 0.22305609285831451,
"learning_rate": 1.8885164990912578e-05,
"loss": 0.0008,
"step": 6500
},
{
"epoch": 0.20909234912086172,
"grad_norm": 0.0017252659890800714,
"learning_rate": 1.8851816650826207e-05,
"loss": 0.0151,
"step": 6600
},
{
"epoch": 0.21226041501663234,
"grad_norm": 0.03503908962011337,
"learning_rate": 1.8818468310739836e-05,
"loss": 0.0007,
"step": 6700
},
{
"epoch": 0.21542848091240296,
"grad_norm": 0.00017412351735401899,
"learning_rate": 1.878511997065346e-05,
"loss": 0.001,
"step": 6800
},
{
"epoch": 0.21859654680817361,
"grad_norm": 0.25898587703704834,
"learning_rate": 1.875177163056709e-05,
"loss": 0.001,
"step": 6900
},
{
"epoch": 0.22176461270394424,
"grad_norm": 0.00022779431310482323,
"learning_rate": 1.871842329048072e-05,
"loss": 0.0022,
"step": 7000
},
{
"epoch": 0.22493267859971489,
"grad_norm": 0.0009634292218834162,
"learning_rate": 1.8685074950394346e-05,
"loss": 0.0017,
"step": 7100
},
{
"epoch": 0.2281007444954855,
"grad_norm": 0.0021817036904394627,
"learning_rate": 1.8651726610307975e-05,
"loss": 0.0026,
"step": 7200
},
{
"epoch": 0.23126881039125613,
"grad_norm": 0.01804823987185955,
"learning_rate": 1.86183782702216e-05,
"loss": 0.0012,
"step": 7300
},
{
"epoch": 0.23443687628702678,
"grad_norm": 0.022183051332831383,
"learning_rate": 1.858502993013523e-05,
"loss": 0.0029,
"step": 7400
},
{
"epoch": 0.2376049421827974,
"grad_norm": 0.0007926349644549191,
"learning_rate": 1.855168159004886e-05,
"loss": 0.0006,
"step": 7500
},
{
"epoch": 0.24077300807856802,
"grad_norm": 0.0001486311521148309,
"learning_rate": 1.8518333249962484e-05,
"loss": 0.0019,
"step": 7600
},
{
"epoch": 0.24394107397433867,
"grad_norm": 0.11593101173639297,
"learning_rate": 1.848498490987611e-05,
"loss": 0.0049,
"step": 7700
},
{
"epoch": 0.2471091398701093,
"grad_norm": 0.06385669112205505,
"learning_rate": 1.845163656978974e-05,
"loss": 0.0018,
"step": 7800
},
{
"epoch": 0.2502772057658799,
"grad_norm": 0.001922784373164177,
"learning_rate": 1.841828822970337e-05,
"loss": 0.0045,
"step": 7900
},
{
"epoch": 0.25344527166165054,
"grad_norm": 0.0033132501412183046,
"learning_rate": 1.8384939889616994e-05,
"loss": 0.002,
"step": 8000
},
{
"epoch": 0.2566133375574212,
"grad_norm": 0.0015832999488338828,
"learning_rate": 1.8351591549530623e-05,
"loss": 0.0009,
"step": 8100
},
{
"epoch": 0.25978140345319184,
"grad_norm": 0.00015198950131889433,
"learning_rate": 1.8318243209444252e-05,
"loss": 0.0103,
"step": 8200
},
{
"epoch": 0.26294946934896246,
"grad_norm": 0.006131887435913086,
"learning_rate": 1.8284894869357878e-05,
"loss": 0.0048,
"step": 8300
},
{
"epoch": 0.2661175352447331,
"grad_norm": 0.005711342208087444,
"learning_rate": 1.8251546529271507e-05,
"loss": 0.0013,
"step": 8400
},
{
"epoch": 0.2692856011405037,
"grad_norm": 0.08911605924367905,
"learning_rate": 1.8218198189185133e-05,
"loss": 0.0016,
"step": 8500
},
{
"epoch": 0.2724536670362743,
"grad_norm": 0.049607861787080765,
"learning_rate": 1.8184849849098762e-05,
"loss": 0.0071,
"step": 8600
},
{
"epoch": 0.275621732932045,
"grad_norm": 0.003345210338011384,
"learning_rate": 1.815150150901239e-05,
"loss": 0.0009,
"step": 8700
},
{
"epoch": 0.2787897988278156,
"grad_norm": 0.002960205776616931,
"learning_rate": 1.8118153168926017e-05,
"loss": 0.0033,
"step": 8800
},
{
"epoch": 0.28195786472358625,
"grad_norm": 0.007276841904968023,
"learning_rate": 1.8084804828839646e-05,
"loss": 0.0005,
"step": 8900
},
{
"epoch": 0.28512593061935687,
"grad_norm": 0.0012576623121276498,
"learning_rate": 1.8051456488753275e-05,
"loss": 0.0011,
"step": 9000
},
{
"epoch": 0.2882939965151275,
"grad_norm": 0.001386396586894989,
"learning_rate": 1.80181081486669e-05,
"loss": 0.0005,
"step": 9100
},
{
"epoch": 0.29146206241089817,
"grad_norm": 0.0024564603809267282,
"learning_rate": 1.798475980858053e-05,
"loss": 0.004,
"step": 9200
},
{
"epoch": 0.2946301283066688,
"grad_norm": 0.00010097989434143528,
"learning_rate": 1.7951411468494156e-05,
"loss": 0.001,
"step": 9300
},
{
"epoch": 0.2977981942024394,
"grad_norm": 0.0008612315286882222,
"learning_rate": 1.7918063128407785e-05,
"loss": 0.0028,
"step": 9400
},
{
"epoch": 0.30096626009821004,
"grad_norm": 0.09617114812135696,
"learning_rate": 1.7884714788321414e-05,
"loss": 0.0011,
"step": 9500
},
{
"epoch": 0.30413432599398066,
"grad_norm": 0.002843310823664069,
"learning_rate": 1.785136644823504e-05,
"loss": 0.0011,
"step": 9600
},
{
"epoch": 0.3073023918897513,
"grad_norm": 0.03222779557108879,
"learning_rate": 1.781801810814867e-05,
"loss": 0.0013,
"step": 9700
},
{
"epoch": 0.31047045778552196,
"grad_norm": 0.002599438652396202,
"learning_rate": 1.7784669768062298e-05,
"loss": 0.0009,
"step": 9800
},
{
"epoch": 0.3136385236812926,
"grad_norm": 9.065222002391238e-06,
"learning_rate": 1.7751321427975924e-05,
"loss": 0.0141,
"step": 9900
},
{
"epoch": 0.3168065895770632,
"grad_norm": 0.136834517121315,
"learning_rate": 1.7717973087889553e-05,
"loss": 0.0038,
"step": 10000
},
{
"epoch": 0.3199746554728338,
"grad_norm": 0.004138951655477285,
"learning_rate": 1.768462474780318e-05,
"loss": 0.0026,
"step": 10100
},
{
"epoch": 0.32314272136860445,
"grad_norm": 7.845780783100054e-05,
"learning_rate": 1.7651276407716808e-05,
"loss": 0.0108,
"step": 10200
},
{
"epoch": 0.3263107872643751,
"grad_norm": 0.00233863340690732,
"learning_rate": 1.7617928067630434e-05,
"loss": 0.0032,
"step": 10300
},
{
"epoch": 0.32947885316014575,
"grad_norm": 5.531415808945894e-05,
"learning_rate": 1.7584579727544063e-05,
"loss": 0.0012,
"step": 10400
},
{
"epoch": 0.33264691905591637,
"grad_norm": 0.002138437470421195,
"learning_rate": 1.755123138745769e-05,
"loss": 0.0011,
"step": 10500
},
{
"epoch": 0.335814984951687,
"grad_norm": 0.007583692204207182,
"learning_rate": 1.7517883047371318e-05,
"loss": 0.0025,
"step": 10600
},
{
"epoch": 0.3389830508474576,
"grad_norm": 0.0399901457130909,
"learning_rate": 1.7484534707284947e-05,
"loss": 0.0026,
"step": 10700
},
{
"epoch": 0.34215111674322823,
"grad_norm": 0.10378504544496536,
"learning_rate": 1.7451186367198573e-05,
"loss": 0.0027,
"step": 10800
},
{
"epoch": 0.3453191826389989,
"grad_norm": 0.002780862618237734,
"learning_rate": 1.74178380271122e-05,
"loss": 0.0006,
"step": 10900
},
{
"epoch": 0.34848724853476953,
"grad_norm": 0.3812588155269623,
"learning_rate": 1.738448968702583e-05,
"loss": 0.0041,
"step": 11000
},
{
"epoch": 0.35165531443054016,
"grad_norm": 2.2577160052605905e-05,
"learning_rate": 1.7351141346939457e-05,
"loss": 0.0012,
"step": 11100
},
{
"epoch": 0.3548233803263108,
"grad_norm": 0.0003742675471585244,
"learning_rate": 1.7317793006853086e-05,
"loss": 0.0036,
"step": 11200
},
{
"epoch": 0.3579914462220814,
"grad_norm": 0.0007130543817766011,
"learning_rate": 1.7284444666766715e-05,
"loss": 0.0009,
"step": 11300
},
{
"epoch": 0.3611595121178521,
"grad_norm": 0.022633198648691177,
"learning_rate": 1.725109632668034e-05,
"loss": 0.0013,
"step": 11400
},
{
"epoch": 0.3643275780136227,
"grad_norm": 0.0017543600406497717,
"learning_rate": 1.721774798659397e-05,
"loss": 0.0045,
"step": 11500
},
{
"epoch": 0.3674956439093933,
"grad_norm": 2.876186408684589e-05,
"learning_rate": 1.7184399646507595e-05,
"loss": 0.0009,
"step": 11600
},
{
"epoch": 0.37066370980516394,
"grad_norm": 0.02501189149916172,
"learning_rate": 1.7151051306421225e-05,
"loss": 0.0031,
"step": 11700
},
{
"epoch": 0.37383177570093457,
"grad_norm": 0.0001932688319357112,
"learning_rate": 1.7117702966334854e-05,
"loss": 0.0025,
"step": 11800
},
{
"epoch": 0.3769998415967052,
"grad_norm": 0.006397546734660864,
"learning_rate": 1.708435462624848e-05,
"loss": 0.0028,
"step": 11900
},
{
"epoch": 0.38016790749247587,
"grad_norm": 0.29939302802085876,
"learning_rate": 1.705100628616211e-05,
"loss": 0.0006,
"step": 12000
},
{
"epoch": 0.3833359733882465,
"grad_norm": 0.00018301274394616485,
"learning_rate": 1.7017657946075738e-05,
"loss": 0.0009,
"step": 12100
},
{
"epoch": 0.3865040392840171,
"grad_norm": 0.16564935445785522,
"learning_rate": 1.6984309605989363e-05,
"loss": 0.0073,
"step": 12200
},
{
"epoch": 0.38967210517978773,
"grad_norm": 0.012698143720626831,
"learning_rate": 1.6950961265902993e-05,
"loss": 0.0045,
"step": 12300
},
{
"epoch": 0.39284017107555835,
"grad_norm": 0.0014803281519562006,
"learning_rate": 1.691761292581662e-05,
"loss": 0.0003,
"step": 12400
},
{
"epoch": 0.39600823697132903,
"grad_norm": 0.009712688624858856,
"learning_rate": 1.6884264585730244e-05,
"loss": 0.0009,
"step": 12500
},
{
"epoch": 0.39917630286709965,
"grad_norm": 0.0013824573252350092,
"learning_rate": 1.6850916245643873e-05,
"loss": 0.0003,
"step": 12600
},
{
"epoch": 0.4023443687628703,
"grad_norm": 0.23537451028823853,
"learning_rate": 1.6817567905557502e-05,
"loss": 0.001,
"step": 12700
},
{
"epoch": 0.4055124346586409,
"grad_norm": 0.031864460557699203,
"learning_rate": 1.6784219565471128e-05,
"loss": 0.0042,
"step": 12800
},
{
"epoch": 0.4086805005544115,
"grad_norm": 0.004931151866912842,
"learning_rate": 1.6750871225384757e-05,
"loss": 0.0098,
"step": 12900
},
{
"epoch": 0.41184856645018214,
"grad_norm": 0.11204478144645691,
"learning_rate": 1.6717522885298386e-05,
"loss": 0.0014,
"step": 13000
},
{
"epoch": 0.4150166323459528,
"grad_norm": 0.002025209367275238,
"learning_rate": 1.6684174545212012e-05,
"loss": 0.0006,
"step": 13100
},
{
"epoch": 0.41818469824172344,
"grad_norm": 0.02806149795651436,
"learning_rate": 1.665082620512564e-05,
"loss": 0.0013,
"step": 13200
},
{
"epoch": 0.42135276413749406,
"grad_norm": 0.003418268170207739,
"learning_rate": 1.661747786503927e-05,
"loss": 0.001,
"step": 13300
},
{
"epoch": 0.4245208300332647,
"grad_norm": 0.00011387121776351705,
"learning_rate": 1.6584129524952896e-05,
"loss": 0.0012,
"step": 13400
},
{
"epoch": 0.4276888959290353,
"grad_norm": 0.008205407299101353,
"learning_rate": 1.6550781184866525e-05,
"loss": 0.001,
"step": 13500
},
{
"epoch": 0.43085696182480593,
"grad_norm": 0.06980779021978378,
"learning_rate": 1.651743284478015e-05,
"loss": 0.0011,
"step": 13600
},
{
"epoch": 0.4340250277205766,
"grad_norm": 0.027860773727297783,
"learning_rate": 1.648408450469378e-05,
"loss": 0.0015,
"step": 13700
},
{
"epoch": 0.43719309361634723,
"grad_norm": 0.0005275747971609235,
"learning_rate": 1.645073616460741e-05,
"loss": 0.0004,
"step": 13800
},
{
"epoch": 0.44036115951211785,
"grad_norm": 0.0019433089764788747,
"learning_rate": 1.6417387824521035e-05,
"loss": 0.0008,
"step": 13900
},
{
"epoch": 0.44352922540788847,
"grad_norm": 0.0017472075996920466,
"learning_rate": 1.6384039484434664e-05,
"loss": 0.001,
"step": 14000
},
{
"epoch": 0.4466972913036591,
"grad_norm": 0.00724576273933053,
"learning_rate": 1.6350691144348293e-05,
"loss": 0.0009,
"step": 14100
},
{
"epoch": 0.44986535719942977,
"grad_norm": 8.959687693277374e-05,
"learning_rate": 1.631734280426192e-05,
"loss": 0.0006,
"step": 14200
},
{
"epoch": 0.4530334230952004,
"grad_norm": 0.0013277491088956594,
"learning_rate": 1.6283994464175548e-05,
"loss": 0.0012,
"step": 14300
},
{
"epoch": 0.456201488990971,
"grad_norm": 0.0497293621301651,
"learning_rate": 1.6250646124089174e-05,
"loss": 0.0018,
"step": 14400
},
{
"epoch": 0.45936955488674164,
"grad_norm": 0.003778190817683935,
"learning_rate": 1.6217297784002803e-05,
"loss": 0.0006,
"step": 14500
},
{
"epoch": 0.46253762078251226,
"grad_norm": 0.022186335176229477,
"learning_rate": 1.6183949443916432e-05,
"loss": 0.0016,
"step": 14600
},
{
"epoch": 0.4657056866782829,
"grad_norm": 8.587163392803632e-06,
"learning_rate": 1.6150601103830058e-05,
"loss": 0.0013,
"step": 14700
},
{
"epoch": 0.46887375257405356,
"grad_norm": 0.1394946575164795,
"learning_rate": 1.6117252763743687e-05,
"loss": 0.0005,
"step": 14800
},
{
"epoch": 0.4720418184698242,
"grad_norm": 0.06866718083620071,
"learning_rate": 1.6083904423657313e-05,
"loss": 0.0003,
"step": 14900
},
{
"epoch": 0.4752098843655948,
"grad_norm": 0.0010026495438069105,
"learning_rate": 1.6050556083570942e-05,
"loss": 0.0007,
"step": 15000
},
{
"epoch": 0.4783779502613654,
"grad_norm": 0.002642757259309292,
"learning_rate": 1.6017207743484568e-05,
"loss": 0.0009,
"step": 15100
},
{
"epoch": 0.48154601615713605,
"grad_norm": 0.001733070588670671,
"learning_rate": 1.5983859403398197e-05,
"loss": 0.001,
"step": 15200
},
{
"epoch": 0.4847140820529067,
"grad_norm": 7.795493729645386e-05,
"learning_rate": 1.5950511063311826e-05,
"loss": 0.002,
"step": 15300
},
{
"epoch": 0.48788214794867735,
"grad_norm": 0.1361948698759079,
"learning_rate": 1.591716272322545e-05,
"loss": 0.0019,
"step": 15400
},
{
"epoch": 0.49105021384444797,
"grad_norm": 0.0015777755761519074,
"learning_rate": 1.588381438313908e-05,
"loss": 0.0007,
"step": 15500
},
{
"epoch": 0.4942182797402186,
"grad_norm": 0.0002617322898004204,
"learning_rate": 1.5850466043052706e-05,
"loss": 0.0006,
"step": 15600
},
{
"epoch": 0.4973863456359892,
"grad_norm": 0.0005603536847047508,
"learning_rate": 1.5817117702966336e-05,
"loss": 0.0021,
"step": 15700
},
{
"epoch": 0.5005544115317598,
"grad_norm": 0.007399669848382473,
"learning_rate": 1.5783769362879965e-05,
"loss": 0.0017,
"step": 15800
},
{
"epoch": 0.5037224774275305,
"grad_norm": 0.0003950314421672374,
"learning_rate": 1.575042102279359e-05,
"loss": 0.0024,
"step": 15900
},
{
"epoch": 0.5068905433233011,
"grad_norm": 0.00017947182641364634,
"learning_rate": 1.571707268270722e-05,
"loss": 0.0014,
"step": 16000
},
{
"epoch": 0.5100586092190718,
"grad_norm": 0.010381902568042278,
"learning_rate": 1.568372434262085e-05,
"loss": 0.0035,
"step": 16100
},
{
"epoch": 0.5132266751148424,
"grad_norm": 0.0018313485197722912,
"learning_rate": 1.5650376002534474e-05,
"loss": 0.0008,
"step": 16200
},
{
"epoch": 0.516394741010613,
"grad_norm": 8.66261325427331e-05,
"learning_rate": 1.5617027662448104e-05,
"loss": 0.0009,
"step": 16300
},
{
"epoch": 0.5195628069063837,
"grad_norm": 0.0049484893679618835,
"learning_rate": 1.5583679322361733e-05,
"loss": 0.0013,
"step": 16400
},
{
"epoch": 0.5227308728021542,
"grad_norm": 0.0376594252884388,
"learning_rate": 1.555033098227536e-05,
"loss": 0.0022,
"step": 16500
},
{
"epoch": 0.5258989386979249,
"grad_norm": 0.0036107038613408804,
"learning_rate": 1.5516982642188988e-05,
"loss": 0.0007,
"step": 16600
},
{
"epoch": 0.5290670045936956,
"grad_norm": 0.0010309889912605286,
"learning_rate": 1.5483634302102613e-05,
"loss": 0.0002,
"step": 16700
},
{
"epoch": 0.5322350704894662,
"grad_norm": 0.0025457690935581923,
"learning_rate": 1.5450285962016243e-05,
"loss": 0.0039,
"step": 16800
},
{
"epoch": 0.5354031363852368,
"grad_norm": 6.187368126120418e-05,
"learning_rate": 1.541693762192987e-05,
"loss": 0.0005,
"step": 16900
},
{
"epoch": 0.5385712022810074,
"grad_norm": 0.001785182161256671,
"learning_rate": 1.5383589281843497e-05,
"loss": 0.0011,
"step": 17000
},
{
"epoch": 0.5417392681767781,
"grad_norm": 8.733890717849135e-05,
"learning_rate": 1.5350240941757127e-05,
"loss": 0.0025,
"step": 17100
},
{
"epoch": 0.5449073340725487,
"grad_norm": 0.334231436252594,
"learning_rate": 1.5316892601670752e-05,
"loss": 0.0006,
"step": 17200
},
{
"epoch": 0.5480753999683193,
"grad_norm": 0.0005896109505556524,
"learning_rate": 1.528354426158438e-05,
"loss": 0.0011,
"step": 17300
},
{
"epoch": 0.55124346586409,
"grad_norm": 0.001895858091302216,
"learning_rate": 1.5250195921498007e-05,
"loss": 0.0004,
"step": 17400
},
{
"epoch": 0.5544115317598606,
"grad_norm": 0.0020244682673364878,
"learning_rate": 1.5216847581411636e-05,
"loss": 0.0007,
"step": 17500
},
{
"epoch": 0.5575795976556313,
"grad_norm": 0.005685487762093544,
"learning_rate": 1.5183499241325264e-05,
"loss": 0.0004,
"step": 17600
},
{
"epoch": 0.5607476635514018,
"grad_norm": 0.0007782336906529963,
"learning_rate": 1.5150150901238891e-05,
"loss": 0.0085,
"step": 17700
},
{
"epoch": 0.5639157294471725,
"grad_norm": 0.0003303899138700217,
"learning_rate": 1.5116802561152519e-05,
"loss": 0.0016,
"step": 17800
},
{
"epoch": 0.5670837953429432,
"grad_norm": 0.004449727479368448,
"learning_rate": 1.5083454221066148e-05,
"loss": 0.0009,
"step": 17900
},
{
"epoch": 0.5702518612387137,
"grad_norm": 0.002211131388321519,
"learning_rate": 1.5050105880979775e-05,
"loss": 0.0005,
"step": 18000
},
{
"epoch": 0.5734199271344844,
"grad_norm": 0.00011551743227755651,
"learning_rate": 1.5016757540893403e-05,
"loss": 0.0009,
"step": 18100
},
{
"epoch": 0.576587993030255,
"grad_norm": 2.151384796889033e-05,
"learning_rate": 1.4983409200807032e-05,
"loss": 0.0005,
"step": 18200
},
{
"epoch": 0.5797560589260257,
"grad_norm": 0.1097760945558548,
"learning_rate": 1.4950060860720659e-05,
"loss": 0.0019,
"step": 18300
},
{
"epoch": 0.5829241248217963,
"grad_norm": 0.004903141874819994,
"learning_rate": 1.4916712520634287e-05,
"loss": 0.0015,
"step": 18400
},
{
"epoch": 0.5860921907175669,
"grad_norm": 0.0018493414390832186,
"learning_rate": 1.4883364180547914e-05,
"loss": 0.0008,
"step": 18500
},
{
"epoch": 0.5892602566133376,
"grad_norm": 0.004849809221923351,
"learning_rate": 1.4850015840461543e-05,
"loss": 0.0011,
"step": 18600
},
{
"epoch": 0.5924283225091082,
"grad_norm": 0.5303362607955933,
"learning_rate": 1.481666750037517e-05,
"loss": 0.0007,
"step": 18700
},
{
"epoch": 0.5955963884048788,
"grad_norm": 0.01051903236657381,
"learning_rate": 1.4783319160288798e-05,
"loss": 0.0011,
"step": 18800
},
{
"epoch": 0.5987644543006495,
"grad_norm": 0.001327801146544516,
"learning_rate": 1.4749970820202425e-05,
"loss": 0.0005,
"step": 18900
},
{
"epoch": 0.6019325201964201,
"grad_norm": 0.002282701199874282,
"learning_rate": 1.4716622480116055e-05,
"loss": 0.0023,
"step": 19000
},
{
"epoch": 0.6051005860921908,
"grad_norm": 0.000489629979711026,
"learning_rate": 1.4683274140029682e-05,
"loss": 0.0028,
"step": 19100
},
{
"epoch": 0.6082686519879613,
"grad_norm": 0.09267735481262207,
"learning_rate": 1.464992579994331e-05,
"loss": 0.0008,
"step": 19200
},
{
"epoch": 0.611436717883732,
"grad_norm": 6.615820893784985e-05,
"learning_rate": 1.4616577459856937e-05,
"loss": 0.0007,
"step": 19300
},
{
"epoch": 0.6146047837795026,
"grad_norm": 0.01198121253401041,
"learning_rate": 1.4583229119770566e-05,
"loss": 0.0005,
"step": 19400
},
{
"epoch": 0.6177728496752732,
"grad_norm": 0.0003829908964689821,
"learning_rate": 1.4549880779684193e-05,
"loss": 0.0003,
"step": 19500
},
{
"epoch": 0.6209409155710439,
"grad_norm": 0.04535700008273125,
"learning_rate": 1.451653243959782e-05,
"loss": 0.0009,
"step": 19600
},
{
"epoch": 0.6241089814668145,
"grad_norm": 0.17491640150547028,
"learning_rate": 1.4483184099511447e-05,
"loss": 0.0024,
"step": 19700
},
{
"epoch": 0.6272770473625852,
"grad_norm": 0.0015510759549215436,
"learning_rate": 1.4449835759425074e-05,
"loss": 0.0008,
"step": 19800
},
{
"epoch": 0.6304451132583557,
"grad_norm": 0.039857182651758194,
"learning_rate": 1.4416487419338703e-05,
"loss": 0.0016,
"step": 19900
},
{
"epoch": 0.6336131791541264,
"grad_norm": 0.0024708157870918512,
"learning_rate": 1.438313907925233e-05,
"loss": 0.0007,
"step": 20000
},
{
"epoch": 0.6367812450498971,
"grad_norm": 2.6672966480255127,
"learning_rate": 1.4349790739165958e-05,
"loss": 0.0227,
"step": 20100
},
{
"epoch": 0.6399493109456676,
"grad_norm": 0.001121210865676403,
"learning_rate": 1.4316442399079587e-05,
"loss": 0.0007,
"step": 20200
},
{
"epoch": 0.6431173768414383,
"grad_norm": 0.00026244454784318805,
"learning_rate": 1.4283094058993215e-05,
"loss": 0.0008,
"step": 20300
},
{
"epoch": 0.6462854427372089,
"grad_norm": 0.008422612212598324,
"learning_rate": 1.4249745718906842e-05,
"loss": 0.0017,
"step": 20400
},
{
"epoch": 0.6494535086329796,
"grad_norm": 0.003273066831752658,
"learning_rate": 1.421639737882047e-05,
"loss": 0.0025,
"step": 20500
},
{
"epoch": 0.6526215745287502,
"grad_norm": 0.004011265933513641,
"learning_rate": 1.4183049038734099e-05,
"loss": 0.0005,
"step": 20600
},
{
"epoch": 0.6557896404245208,
"grad_norm": 2.8203889087308198e-05,
"learning_rate": 1.4149700698647726e-05,
"loss": 0.0006,
"step": 20700
},
{
"epoch": 0.6589577063202915,
"grad_norm": 0.028400663286447525,
"learning_rate": 1.4116352358561354e-05,
"loss": 0.0005,
"step": 20800
},
{
"epoch": 0.6621257722160621,
"grad_norm": 0.005530569702386856,
"learning_rate": 1.4083004018474981e-05,
"loss": 0.0007,
"step": 20900
},
{
"epoch": 0.6652938381118327,
"grad_norm": 0.0010687313042581081,
"learning_rate": 1.404965567838861e-05,
"loss": 0.0012,
"step": 21000
},
{
"epoch": 0.6684619040076034,
"grad_norm": 0.02122735045850277,
"learning_rate": 1.4016307338302238e-05,
"loss": 0.0003,
"step": 21100
},
{
"epoch": 0.671629969903374,
"grad_norm": 0.0008722911006771028,
"learning_rate": 1.3982958998215865e-05,
"loss": 0.0021,
"step": 21200
},
{
"epoch": 0.6747980357991447,
"grad_norm": 0.06774666905403137,
"learning_rate": 1.3949610658129492e-05,
"loss": 0.0007,
"step": 21300
},
{
"epoch": 0.6779661016949152,
"grad_norm": 0.000172132917214185,
"learning_rate": 1.3916262318043122e-05,
"loss": 0.0005,
"step": 21400
},
{
"epoch": 0.6811341675906859,
"grad_norm": 0.0020967440214008093,
"learning_rate": 1.3882913977956749e-05,
"loss": 0.0004,
"step": 21500
},
{
"epoch": 0.6843022334864565,
"grad_norm": 0.00023208513448480517,
"learning_rate": 1.3849565637870376e-05,
"loss": 0.0007,
"step": 21600
},
{
"epoch": 0.6874702993822271,
"grad_norm": 0.0010145423002541065,
"learning_rate": 1.3816217297784006e-05,
"loss": 0.0007,
"step": 21700
},
{
"epoch": 0.6906383652779978,
"grad_norm": 0.017040489241480827,
"learning_rate": 1.3782868957697633e-05,
"loss": 0.001,
"step": 21800
},
{
"epoch": 0.6938064311737684,
"grad_norm": 0.0001387975353281945,
"learning_rate": 1.374952061761126e-05,
"loss": 0.001,
"step": 21900
},
{
"epoch": 0.6969744970695391,
"grad_norm": 0.0011847980786114931,
"learning_rate": 1.3716172277524886e-05,
"loss": 0.0015,
"step": 22000
},
{
"epoch": 0.7001425629653096,
"grad_norm": 0.01234695129096508,
"learning_rate": 1.3682823937438514e-05,
"loss": 0.0054,
"step": 22100
},
{
"epoch": 0.7033106288610803,
"grad_norm": 0.07681901007890701,
"learning_rate": 1.3649475597352143e-05,
"loss": 0.0004,
"step": 22200
},
{
"epoch": 0.706478694756851,
"grad_norm": 0.018233176320791245,
"learning_rate": 1.361612725726577e-05,
"loss": 0.0005,
"step": 22300
},
{
"epoch": 0.7096467606526216,
"grad_norm": 0.07004215568304062,
"learning_rate": 1.3582778917179398e-05,
"loss": 0.0007,
"step": 22400
},
{
"epoch": 0.7128148265483922,
"grad_norm": 0.0069395024329423904,
"learning_rate": 1.3549430577093025e-05,
"loss": 0.0006,
"step": 22500
},
{
"epoch": 0.7159828924441628,
"grad_norm": 0.0007382028270512819,
"learning_rate": 1.3516082237006654e-05,
"loss": 0.0053,
"step": 22600
},
{
"epoch": 0.7191509583399335,
"grad_norm": 0.03294491395354271,
"learning_rate": 1.3482733896920282e-05,
"loss": 0.0005,
"step": 22700
},
{
"epoch": 0.7223190242357042,
"grad_norm": 0.22843457758426666,
"learning_rate": 1.3449385556833909e-05,
"loss": 0.001,
"step": 22800
},
{
"epoch": 0.7254870901314747,
"grad_norm": 0.0002898550301324576,
"learning_rate": 1.3416037216747537e-05,
"loss": 0.0004,
"step": 22900
},
{
"epoch": 0.7286551560272454,
"grad_norm": 0.0034780879504978657,
"learning_rate": 1.3382688876661166e-05,
"loss": 0.0016,
"step": 23000
},
{
"epoch": 0.731823221923016,
"grad_norm": 0.04466221109032631,
"learning_rate": 1.3349340536574793e-05,
"loss": 0.0043,
"step": 23100
},
{
"epoch": 0.7349912878187866,
"grad_norm": 9.875125215330627e-06,
"learning_rate": 1.331599219648842e-05,
"loss": 0.0019,
"step": 23200
},
{
"epoch": 0.7381593537145572,
"grad_norm": 0.5125452876091003,
"learning_rate": 1.3282643856402048e-05,
"loss": 0.0011,
"step": 23300
},
{
"epoch": 0.7413274196103279,
"grad_norm": 0.02045290358364582,
"learning_rate": 1.3249295516315677e-05,
"loss": 0.0004,
"step": 23400
},
{
"epoch": 0.7444954855060986,
"grad_norm": 0.04965211823582649,
"learning_rate": 1.3215947176229305e-05,
"loss": 0.0008,
"step": 23500
},
{
"epoch": 0.7476635514018691,
"grad_norm": 0.09520922601222992,
"learning_rate": 1.3182598836142932e-05,
"loss": 0.0023,
"step": 23600
},
{
"epoch": 0.7508316172976398,
"grad_norm": 4.333815923018847e-06,
"learning_rate": 1.3149250496056561e-05,
"loss": 0.0005,
"step": 23700
},
{
"epoch": 0.7539996831934104,
"grad_norm": 0.21383151412010193,
"learning_rate": 1.3115902155970189e-05,
"loss": 0.0006,
"step": 23800
},
{
"epoch": 0.757167749089181,
"grad_norm": 0.0006424040184356272,
"learning_rate": 1.3082553815883816e-05,
"loss": 0.0004,
"step": 23900
},
{
"epoch": 0.7603358149849517,
"grad_norm": 0.0008101545972749591,
"learning_rate": 1.3049205475797443e-05,
"loss": 0.0003,
"step": 24000
},
{
"epoch": 0.7635038808807223,
"grad_norm": 0.0026161724235862494,
"learning_rate": 1.3015857135711073e-05,
"loss": 0.0006,
"step": 24100
},
{
"epoch": 0.766671946776493,
"grad_norm": 9.565529762767255e-05,
"learning_rate": 1.29825087956247e-05,
"loss": 0.0009,
"step": 24200
},
{
"epoch": 0.7698400126722635,
"grad_norm": 0.026133345440030098,
"learning_rate": 1.2949160455538327e-05,
"loss": 0.0055,
"step": 24300
},
{
"epoch": 0.7730080785680342,
"grad_norm": 0.0001082066519302316,
"learning_rate": 1.2915812115451953e-05,
"loss": 0.0007,
"step": 24400
},
{
"epoch": 0.7761761444638049,
"grad_norm": 0.0007577472133561969,
"learning_rate": 1.288246377536558e-05,
"loss": 0.001,
"step": 24500
},
{
"epoch": 0.7793442103595755,
"grad_norm": 0.0003420517314225435,
"learning_rate": 1.284911543527921e-05,
"loss": 0.0014,
"step": 24600
},
{
"epoch": 0.7825122762553461,
"grad_norm": 0.006726464722305536,
"learning_rate": 1.2815767095192837e-05,
"loss": 0.0008,
"step": 24700
},
{
"epoch": 0.7856803421511167,
"grad_norm": 0.0014214670518413186,
"learning_rate": 1.2782418755106465e-05,
"loss": 0.0011,
"step": 24800
},
{
"epoch": 0.7888484080468874,
"grad_norm": 0.006449591834098101,
"learning_rate": 1.2749070415020092e-05,
"loss": 0.0026,
"step": 24900
},
{
"epoch": 0.7920164739426581,
"grad_norm": 0.0005304469959810376,
"learning_rate": 1.2715722074933721e-05,
"loss": 0.0026,
"step": 25000
},
{
"epoch": 0.7951845398384286,
"grad_norm": 0.0006021055160090327,
"learning_rate": 1.2682373734847349e-05,
"loss": 0.0008,
"step": 25100
},
{
"epoch": 0.7983526057341993,
"grad_norm": 0.0025042754132300615,
"learning_rate": 1.2649025394760976e-05,
"loss": 0.001,
"step": 25200
},
{
"epoch": 0.8015206716299699,
"grad_norm": 0.013771702535450459,
"learning_rate": 1.2615677054674605e-05,
"loss": 0.0014,
"step": 25300
},
{
"epoch": 0.8046887375257405,
"grad_norm": 0.007065094541758299,
"learning_rate": 1.2582328714588233e-05,
"loss": 0.0005,
"step": 25400
},
{
"epoch": 0.8078568034215111,
"grad_norm": 0.0009276416385546327,
"learning_rate": 1.254898037450186e-05,
"loss": 0.0018,
"step": 25500
},
{
"epoch": 0.8110248693172818,
"grad_norm": 0.026806719601154327,
"learning_rate": 1.2515632034415487e-05,
"loss": 0.0011,
"step": 25600
},
{
"epoch": 0.8141929352130525,
"grad_norm": 0.00029596476815640926,
"learning_rate": 1.2482283694329117e-05,
"loss": 0.0036,
"step": 25700
},
{
"epoch": 0.817361001108823,
"grad_norm": 0.0062362137250602245,
"learning_rate": 1.2448935354242744e-05,
"loss": 0.0009,
"step": 25800
},
{
"epoch": 0.8205290670045937,
"grad_norm": 0.005555544979870319,
"learning_rate": 1.2415587014156371e-05,
"loss": 0.0019,
"step": 25900
},
{
"epoch": 0.8236971329003643,
"grad_norm": 0.005463754292577505,
"learning_rate": 1.2382238674069999e-05,
"loss": 0.0014,
"step": 26000
},
{
"epoch": 0.826865198796135,
"grad_norm": 0.0022091898135840893,
"learning_rate": 1.2348890333983628e-05,
"loss": 0.0005,
"step": 26100
},
{
"epoch": 0.8300332646919056,
"grad_norm": 0.12676991522312164,
"learning_rate": 1.2315541993897255e-05,
"loss": 0.0014,
"step": 26200
},
{
"epoch": 0.8332013305876762,
"grad_norm": 0.0015652469592168927,
"learning_rate": 1.2282193653810883e-05,
"loss": 0.0026,
"step": 26300
},
{
"epoch": 0.8363693964834469,
"grad_norm": 0.00408145971596241,
"learning_rate": 1.224884531372451e-05,
"loss": 0.0011,
"step": 26400
},
{
"epoch": 0.8395374623792174,
"grad_norm": 0.00045841402607038617,
"learning_rate": 1.221549697363814e-05,
"loss": 0.0216,
"step": 26500
},
{
"epoch": 0.8427055282749881,
"grad_norm": 0.02451806142926216,
"learning_rate": 1.2182148633551767e-05,
"loss": 0.002,
"step": 26600
},
{
"epoch": 0.8458735941707588,
"grad_norm": 0.00027516239788383245,
"learning_rate": 1.2148800293465394e-05,
"loss": 0.001,
"step": 26700
},
{
"epoch": 0.8490416600665294,
"grad_norm": 1.4936133084120229e-05,
"learning_rate": 1.211545195337902e-05,
"loss": 0.0048,
"step": 26800
},
{
"epoch": 0.8522097259623,
"grad_norm": 0.00033001156407408416,
"learning_rate": 1.2082103613292648e-05,
"loss": 0.0083,
"step": 26900
},
{
"epoch": 0.8553777918580706,
"grad_norm": 0.017794223502278328,
"learning_rate": 1.2048755273206277e-05,
"loss": 0.0011,
"step": 27000
},
{
"epoch": 0.8585458577538413,
"grad_norm": 0.0008482965640723705,
"learning_rate": 1.2015406933119904e-05,
"loss": 0.0054,
"step": 27100
},
{
"epoch": 0.8617139236496119,
"grad_norm": 0.01870151422917843,
"learning_rate": 1.1982058593033532e-05,
"loss": 0.015,
"step": 27200
},
{
"epoch": 0.8648819895453825,
"grad_norm": 10.855655670166016,
"learning_rate": 1.194871025294716e-05,
"loss": 0.003,
"step": 27300
},
{
"epoch": 0.8680500554411532,
"grad_norm": 0.029217010363936424,
"learning_rate": 1.1915361912860788e-05,
"loss": 0.0024,
"step": 27400
},
{
"epoch": 0.8712181213369238,
"grad_norm": 0.03195062279701233,
"learning_rate": 1.1882013572774416e-05,
"loss": 0.0014,
"step": 27500
},
{
"epoch": 0.8743861872326945,
"grad_norm": 0.005607489962130785,
"learning_rate": 1.1848665232688043e-05,
"loss": 0.0029,
"step": 27600
},
{
"epoch": 0.877554253128465,
"grad_norm": 0.0009394127409905195,
"learning_rate": 1.1815316892601672e-05,
"loss": 0.0041,
"step": 27700
},
{
"epoch": 0.8807223190242357,
"grad_norm": 0.004952901508659124,
"learning_rate": 1.17819685525153e-05,
"loss": 0.0007,
"step": 27800
},
{
"epoch": 0.8838903849200064,
"grad_norm": 0.0013988588470965624,
"learning_rate": 1.1748620212428927e-05,
"loss": 0.0019,
"step": 27900
},
{
"epoch": 0.8870584508157769,
"grad_norm": 0.0029521535616368055,
"learning_rate": 1.1715271872342554e-05,
"loss": 0.001,
"step": 28000
},
{
"epoch": 0.8902265167115476,
"grad_norm": 0.0014934735372662544,
"learning_rate": 1.1681923532256184e-05,
"loss": 0.0006,
"step": 28100
},
{
"epoch": 0.8933945826073182,
"grad_norm": 0.12357547879219055,
"learning_rate": 1.1648575192169811e-05,
"loss": 0.0008,
"step": 28200
},
{
"epoch": 0.8965626485030889,
"grad_norm": 0.0003371001803316176,
"learning_rate": 1.1615226852083438e-05,
"loss": 0.001,
"step": 28300
},
{
"epoch": 0.8997307143988595,
"grad_norm": 0.01792779751121998,
"learning_rate": 1.1581878511997066e-05,
"loss": 0.0014,
"step": 28400
},
{
"epoch": 0.9028987802946301,
"grad_norm": 0.031045174226164818,
"learning_rate": 1.1548530171910695e-05,
"loss": 0.0019,
"step": 28500
},
{
"epoch": 0.9060668461904008,
"grad_norm": 0.031731363385915756,
"learning_rate": 1.1515181831824322e-05,
"loss": 0.0005,
"step": 28600
},
{
"epoch": 0.9092349120861714,
"grad_norm": 0.003199261147528887,
"learning_rate": 1.148183349173795e-05,
"loss": 0.0021,
"step": 28700
},
{
"epoch": 0.912402977981942,
"grad_norm": 4.737121344078332e-05,
"learning_rate": 1.1448485151651579e-05,
"loss": 0.0006,
"step": 28800
},
{
"epoch": 0.9155710438777127,
"grad_norm": 0.012714708223938942,
"learning_rate": 1.1415136811565206e-05,
"loss": 0.0013,
"step": 28900
},
{
"epoch": 0.9187391097734833,
"grad_norm": 0.00023968149616848677,
"learning_rate": 1.1381788471478834e-05,
"loss": 0.0013,
"step": 29000
},
{
"epoch": 0.921907175669254,
"grad_norm": 0.00018393975915387273,
"learning_rate": 1.1348440131392461e-05,
"loss": 0.003,
"step": 29100
},
{
"epoch": 0.9250752415650245,
"grad_norm": 31.17757225036621,
"learning_rate": 1.1315091791306087e-05,
"loss": 0.0073,
"step": 29200
},
{
"epoch": 0.9282433074607952,
"grad_norm": 0.7764952182769775,
"learning_rate": 1.1281743451219716e-05,
"loss": 0.0006,
"step": 29300
},
{
"epoch": 0.9314113733565658,
"grad_norm": 0.02078479342162609,
"learning_rate": 1.1248395111133344e-05,
"loss": 0.0055,
"step": 29400
},
{
"epoch": 0.9345794392523364,
"grad_norm": 0.024362290278077126,
"learning_rate": 1.1215046771046971e-05,
"loss": 0.0015,
"step": 29500
},
{
"epoch": 0.9377475051481071,
"grad_norm": 0.006054306868463755,
"learning_rate": 1.1181698430960599e-05,
"loss": 0.0006,
"step": 29600
},
{
"epoch": 0.9409155710438777,
"grad_norm": 0.0014811428263783455,
"learning_rate": 1.1148350090874228e-05,
"loss": 0.0016,
"step": 29700
},
{
"epoch": 0.9440836369396484,
"grad_norm": 0.00024473376106470823,
"learning_rate": 1.1115001750787855e-05,
"loss": 0.0048,
"step": 29800
},
{
"epoch": 0.9472517028354189,
"grad_norm": 0.9019193053245544,
"learning_rate": 1.1081653410701483e-05,
"loss": 0.0012,
"step": 29900
},
{
"epoch": 0.9504197687311896,
"grad_norm": 0.005399579647928476,
"learning_rate": 1.104830507061511e-05,
"loss": 0.0017,
"step": 30000
},
{
"epoch": 0.9535878346269603,
"grad_norm": 9.696155757410452e-05,
"learning_rate": 1.1014956730528739e-05,
"loss": 0.001,
"step": 30100
},
{
"epoch": 0.9567559005227309,
"grad_norm": 0.004988879431039095,
"learning_rate": 1.0981608390442367e-05,
"loss": 0.0009,
"step": 30200
},
{
"epoch": 0.9599239664185015,
"grad_norm": 0.0014560155104845762,
"learning_rate": 1.0948260050355994e-05,
"loss": 0.0046,
"step": 30300
},
{
"epoch": 0.9630920323142721,
"grad_norm": 0.0015756795182824135,
"learning_rate": 1.0914911710269623e-05,
"loss": 0.0019,
"step": 30400
},
{
"epoch": 0.9662600982100428,
"grad_norm": 0.0033032239880412817,
"learning_rate": 1.088156337018325e-05,
"loss": 0.0034,
"step": 30500
},
{
"epoch": 0.9694281641058135,
"grad_norm": 0.001508047222159803,
"learning_rate": 1.0848215030096878e-05,
"loss": 0.0015,
"step": 30600
},
{
"epoch": 0.972596230001584,
"grad_norm": 0.005174440797418356,
"learning_rate": 1.0814866690010505e-05,
"loss": 0.0007,
"step": 30700
},
{
"epoch": 0.9757642958973547,
"grad_norm": 0.08969740569591522,
"learning_rate": 1.0781518349924135e-05,
"loss": 0.0031,
"step": 30800
},
{
"epoch": 0.9789323617931253,
"grad_norm": 1.643660471017938e-05,
"learning_rate": 1.0748170009837762e-05,
"loss": 0.0039,
"step": 30900
},
{
"epoch": 0.9821004276888959,
"grad_norm": 0.0014628027565777302,
"learning_rate": 1.071482166975139e-05,
"loss": 0.0011,
"step": 31000
},
{
"epoch": 0.9852684935846666,
"grad_norm": 0.00014162520528770983,
"learning_rate": 1.0681473329665017e-05,
"loss": 0.0014,
"step": 31100
},
{
"epoch": 0.9884365594804372,
"grad_norm": 0.0008995328098535538,
"learning_rate": 1.0648124989578646e-05,
"loss": 0.0015,
"step": 31200
},
{
"epoch": 0.9916046253762079,
"grad_norm": 0.000977379735559225,
"learning_rate": 1.0614776649492273e-05,
"loss": 0.0018,
"step": 31300
},
{
"epoch": 0.9947726912719784,
"grad_norm": 0.0006964199710637331,
"learning_rate": 1.0581428309405901e-05,
"loss": 0.0349,
"step": 31400
},
{
"epoch": 0.9979407571677491,
"grad_norm": 0.001510333619080484,
"learning_rate": 1.0548079969319528e-05,
"loss": 0.0007,
"step": 31500
},
{
"epoch": 1.0011088230635197,
"grad_norm": 0.0027738306671380997,
"learning_rate": 1.0514731629233154e-05,
"loss": 0.0014,
"step": 31600
},
{
"epoch": 1.0042768889592903,
"grad_norm": 0.0002610177034512162,
"learning_rate": 1.0481383289146783e-05,
"loss": 0.0024,
"step": 31700
},
{
"epoch": 1.007444954855061,
"grad_norm": 0.0035766460932791233,
"learning_rate": 1.044803494906041e-05,
"loss": 0.0013,
"step": 31800
},
{
"epoch": 1.0106130207508317,
"grad_norm": 0.001029430190101266,
"learning_rate": 1.0414686608974038e-05,
"loss": 0.0084,
"step": 31900
},
{
"epoch": 1.0137810866466022,
"grad_norm": 0.0004998042713850737,
"learning_rate": 1.0381338268887665e-05,
"loss": 0.0016,
"step": 32000
},
{
"epoch": 1.0169491525423728,
"grad_norm": 0.03187868371605873,
"learning_rate": 1.0347989928801295e-05,
"loss": 0.0007,
"step": 32100
},
{
"epoch": 1.0201172184381435,
"grad_norm": 0.0006562778726220131,
"learning_rate": 1.0314641588714922e-05,
"loss": 0.0018,
"step": 32200
},
{
"epoch": 1.0232852843339142,
"grad_norm": 0.0015058261342346668,
"learning_rate": 1.028129324862855e-05,
"loss": 0.0017,
"step": 32300
},
{
"epoch": 1.0264533502296849,
"grad_norm": 0.00016952259466052055,
"learning_rate": 1.0247944908542179e-05,
"loss": 0.0017,
"step": 32400
},
{
"epoch": 1.0296214161254553,
"grad_norm": 7.879018994572107e-06,
"learning_rate": 1.0214596568455806e-05,
"loss": 0.0008,
"step": 32500
},
{
"epoch": 1.032789482021226,
"grad_norm": 0.006021997891366482,
"learning_rate": 1.0181248228369433e-05,
"loss": 0.0012,
"step": 32600
},
{
"epoch": 1.0359575479169967,
"grad_norm": 0.2344316691160202,
"learning_rate": 1.0147899888283061e-05,
"loss": 0.0028,
"step": 32700
},
{
"epoch": 1.0391256138127674,
"grad_norm": 0.002558755222707987,
"learning_rate": 1.011455154819669e-05,
"loss": 0.0004,
"step": 32800
},
{
"epoch": 1.042293679708538,
"grad_norm": 0.31538712978363037,
"learning_rate": 1.0081203208110317e-05,
"loss": 0.0007,
"step": 32900
},
{
"epoch": 1.0454617456043085,
"grad_norm": 0.00013157808280084282,
"learning_rate": 1.0047854868023945e-05,
"loss": 0.0002,
"step": 33000
},
{
"epoch": 1.0486298115000792,
"grad_norm": 0.0020028562285006046,
"learning_rate": 1.0014506527937572e-05,
"loss": 0.0013,
"step": 33100
},
{
"epoch": 1.0517978773958498,
"grad_norm": 0.266053169965744,
"learning_rate": 9.981158187851201e-06,
"loss": 0.0013,
"step": 33200
},
{
"epoch": 1.0549659432916205,
"grad_norm": 0.0006200580974109471,
"learning_rate": 9.947809847764829e-06,
"loss": 0.0009,
"step": 33300
},
{
"epoch": 1.0581340091873912,
"grad_norm": 0.0010163384722545743,
"learning_rate": 9.914461507678456e-06,
"loss": 0.0004,
"step": 33400
},
{
"epoch": 1.0613020750831617,
"grad_norm": 0.0007398608140647411,
"learning_rate": 9.881113167592084e-06,
"loss": 0.0006,
"step": 33500
},
{
"epoch": 1.0644701409789323,
"grad_norm": 5.2672654419438913e-05,
"learning_rate": 9.847764827505711e-06,
"loss": 0.0003,
"step": 33600
},
{
"epoch": 1.067638206874703,
"grad_norm": 0.0007165202987380326,
"learning_rate": 9.814416487419339e-06,
"loss": 0.0014,
"step": 33700
},
{
"epoch": 1.0708062727704737,
"grad_norm": 0.012463639490306377,
"learning_rate": 9.781068147332968e-06,
"loss": 0.0006,
"step": 33800
},
{
"epoch": 1.0739743386662441,
"grad_norm": 0.0006154962466098368,
"learning_rate": 9.747719807246595e-06,
"loss": 0.0002,
"step": 33900
},
{
"epoch": 1.0771424045620148,
"grad_norm": 0.018396640196442604,
"learning_rate": 9.714371467160223e-06,
"loss": 0.0004,
"step": 34000
},
{
"epoch": 1.0803104704577855,
"grad_norm": 0.0008099168189801276,
"learning_rate": 9.68102312707385e-06,
"loss": 0.0008,
"step": 34100
},
{
"epoch": 1.0834785363535562,
"grad_norm": 0.01304282434284687,
"learning_rate": 9.64767478698748e-06,
"loss": 0.0004,
"step": 34200
},
{
"epoch": 1.0866466022493269,
"grad_norm": 1.1287897825241089,
"learning_rate": 9.614326446901107e-06,
"loss": 0.0005,
"step": 34300
},
{
"epoch": 1.0898146681450975,
"grad_norm": 0.0009092055261135101,
"learning_rate": 9.580978106814734e-06,
"loss": 0.0003,
"step": 34400
},
{
"epoch": 1.092982734040868,
"grad_norm": 1.1557484867807943e-05,
"learning_rate": 9.547629766728362e-06,
"loss": 0.0001,
"step": 34500
},
{
"epoch": 1.0961507999366387,
"grad_norm": 0.0008222841424867511,
"learning_rate": 9.514281426641989e-06,
"loss": 0.0012,
"step": 34600
},
{
"epoch": 1.0993188658324093,
"grad_norm": 0.0022294274531304836,
"learning_rate": 9.480933086555616e-06,
"loss": 0.0058,
"step": 34700
},
{
"epoch": 1.10248693172818,
"grad_norm": 0.009762086905539036,
"learning_rate": 9.447584746469246e-06,
"loss": 0.0006,
"step": 34800
},
{
"epoch": 1.1056549976239505,
"grad_norm": 0.7700905799865723,
"learning_rate": 9.414236406382873e-06,
"loss": 0.002,
"step": 34900
},
{
"epoch": 1.1088230635197212,
"grad_norm": 0.0003761460247915238,
"learning_rate": 9.3808880662965e-06,
"loss": 0.0003,
"step": 35000
},
{
"epoch": 1.1119911294154918,
"grad_norm": 0.031011968851089478,
"learning_rate": 9.347539726210128e-06,
"loss": 0.0002,
"step": 35100
},
{
"epoch": 1.1151591953112625,
"grad_norm": 5.691965634468943e-05,
"learning_rate": 9.314191386123757e-06,
"loss": 0.0012,
"step": 35200
},
{
"epoch": 1.1183272612070332,
"grad_norm": 0.002426894148811698,
"learning_rate": 9.280843046037384e-06,
"loss": 0.0003,
"step": 35300
},
{
"epoch": 1.1214953271028036,
"grad_norm": 8.327289106091484e-05,
"learning_rate": 9.247494705951012e-06,
"loss": 0.0008,
"step": 35400
},
{
"epoch": 1.1246633929985743,
"grad_norm": 0.0014542491408064961,
"learning_rate": 9.21414636586464e-06,
"loss": 0.0003,
"step": 35500
},
{
"epoch": 1.127831458894345,
"grad_norm": 0.00029037665808573365,
"learning_rate": 9.180798025778268e-06,
"loss": 0.0006,
"step": 35600
},
{
"epoch": 1.1309995247901157,
"grad_norm": 0.00022046563390176743,
"learning_rate": 9.147449685691896e-06,
"loss": 0.0003,
"step": 35700
},
{
"epoch": 1.1341675906858864,
"grad_norm": 0.0006549390382133424,
"learning_rate": 9.114101345605523e-06,
"loss": 0.0002,
"step": 35800
},
{
"epoch": 1.1373356565816568,
"grad_norm": 7.389521488221362e-05,
"learning_rate": 9.08075300551915e-06,
"loss": 0.0004,
"step": 35900
},
{
"epoch": 1.1405037224774275,
"grad_norm": 0.03006296418607235,
"learning_rate": 9.047404665432778e-06,
"loss": 0.0004,
"step": 36000
},
{
"epoch": 1.1436717883731982,
"grad_norm": 0.0006366446614265442,
"learning_rate": 9.014056325346406e-06,
"loss": 0.0003,
"step": 36100
},
{
"epoch": 1.1468398542689688,
"grad_norm": 1.6421807231381536e-05,
"learning_rate": 8.980707985260035e-06,
"loss": 0.0011,
"step": 36200
},
{
"epoch": 1.1500079201647395,
"grad_norm": 0.0005404766998253763,
"learning_rate": 8.947359645173662e-06,
"loss": 0.0003,
"step": 36300
},
{
"epoch": 1.15317598606051,
"grad_norm": 4.2021845729323104e-05,
"learning_rate": 8.91401130508729e-06,
"loss": 0.0005,
"step": 36400
},
{
"epoch": 1.1563440519562806,
"grad_norm": 0.224160835146904,
"learning_rate": 8.880662965000919e-06,
"loss": 0.0006,
"step": 36500
},
{
"epoch": 1.1595121178520513,
"grad_norm": 0.00010083234519697726,
"learning_rate": 8.847314624914546e-06,
"loss": 0.0004,
"step": 36600
},
{
"epoch": 1.162680183747822,
"grad_norm": 0.00012005392636638135,
"learning_rate": 8.813966284828174e-06,
"loss": 0.0002,
"step": 36700
},
{
"epoch": 1.1658482496435927,
"grad_norm": 0.00014081050176173449,
"learning_rate": 8.780617944741801e-06,
"loss": 0.0002,
"step": 36800
},
{
"epoch": 1.1690163155393631,
"grad_norm": 0.0011462682159617543,
"learning_rate": 8.74726960465543e-06,
"loss": 0.0003,
"step": 36900
},
{
"epoch": 1.1721843814351338,
"grad_norm": 0.0005480795516632497,
"learning_rate": 8.713921264569056e-06,
"loss": 0.0008,
"step": 37000
},
{
"epoch": 1.1753524473309045,
"grad_norm": 0.00025665463181212544,
"learning_rate": 8.680572924482683e-06,
"loss": 0.0003,
"step": 37100
},
{
"epoch": 1.1785205132266752,
"grad_norm": 0.0019408657681196928,
"learning_rate": 8.647224584396313e-06,
"loss": 0.0004,
"step": 37200
},
{
"epoch": 1.1816885791224458,
"grad_norm": 8.986064131022431e-06,
"learning_rate": 8.61387624430994e-06,
"loss": 0.0003,
"step": 37300
},
{
"epoch": 1.1848566450182163,
"grad_norm": 0.008489036932587624,
"learning_rate": 8.580527904223567e-06,
"loss": 0.0004,
"step": 37400
},
{
"epoch": 1.188024710913987,
"grad_norm": 0.0009591535781510174,
"learning_rate": 8.547179564137197e-06,
"loss": 0.0004,
"step": 37500
},
{
"epoch": 1.1911927768097577,
"grad_norm": 0.009112311527132988,
"learning_rate": 8.513831224050824e-06,
"loss": 0.0004,
"step": 37600
},
{
"epoch": 1.1943608427055283,
"grad_norm": 0.9046971797943115,
"learning_rate": 8.480482883964451e-06,
"loss": 0.0004,
"step": 37700
},
{
"epoch": 1.1975289086012988,
"grad_norm": 0.00033651836565695703,
"learning_rate": 8.447134543878079e-06,
"loss": 0.0009,
"step": 37800
},
{
"epoch": 1.2006969744970695,
"grad_norm": 0.03974246233701706,
"learning_rate": 8.413786203791708e-06,
"loss": 0.0005,
"step": 37900
},
{
"epoch": 1.2038650403928401,
"grad_norm": 5.420077286544256e-05,
"learning_rate": 8.380437863705335e-06,
"loss": 0.0001,
"step": 38000
},
{
"epoch": 1.2070331062886108,
"grad_norm": 4.04864113079384e-06,
"learning_rate": 8.347089523618963e-06,
"loss": 0.0001,
"step": 38100
},
{
"epoch": 1.2102011721843815,
"grad_norm": 0.011167597025632858,
"learning_rate": 8.31374118353259e-06,
"loss": 0.0004,
"step": 38200
},
{
"epoch": 1.2133692380801522,
"grad_norm": 0.0032947207801043987,
"learning_rate": 8.280392843446218e-06,
"loss": 0.0003,
"step": 38300
},
{
"epoch": 1.2165373039759226,
"grad_norm": 0.003191061317920685,
"learning_rate": 8.247044503359845e-06,
"loss": 0.0004,
"step": 38400
},
{
"epoch": 1.2197053698716933,
"grad_norm": 0.0017466794233769178,
"learning_rate": 8.213696163273474e-06,
"loss": 0.0003,
"step": 38500
},
{
"epoch": 1.222873435767464,
"grad_norm": 0.000900625775102526,
"learning_rate": 8.180347823187102e-06,
"loss": 0.0025,
"step": 38600
},
{
"epoch": 1.2260415016632347,
"grad_norm": 0.015701597556471825,
"learning_rate": 8.14699948310073e-06,
"loss": 0.0013,
"step": 38700
},
{
"epoch": 1.2292095675590051,
"grad_norm": 8.967128087533638e-05,
"learning_rate": 8.113651143014357e-06,
"loss": 0.0002,
"step": 38800
},
{
"epoch": 1.2323776334547758,
"grad_norm": 0.23212437331676483,
"learning_rate": 8.080302802927986e-06,
"loss": 0.0002,
"step": 38900
},
{
"epoch": 1.2355456993505465,
"grad_norm": 0.001443861285224557,
"learning_rate": 8.046954462841613e-06,
"loss": 0.0006,
"step": 39000
},
{
"epoch": 1.2387137652463172,
"grad_norm": 0.32768702507019043,
"learning_rate": 8.01360612275524e-06,
"loss": 0.0002,
"step": 39100
},
{
"epoch": 1.2418818311420878,
"grad_norm": 0.13200968503952026,
"learning_rate": 7.980257782668868e-06,
"loss": 0.0003,
"step": 39200
},
{
"epoch": 1.2450498970378585,
"grad_norm": 0.041544314473867416,
"learning_rate": 7.946909442582497e-06,
"loss": 0.0003,
"step": 39300
},
{
"epoch": 1.248217962933629,
"grad_norm": 0.003154648235067725,
"learning_rate": 7.913561102496123e-06,
"loss": 0.0003,
"step": 39400
},
{
"epoch": 1.2513860288293996,
"grad_norm": 7.159214146668091e-05,
"learning_rate": 7.880212762409752e-06,
"loss": 0.0003,
"step": 39500
},
{
"epoch": 1.2545540947251703,
"grad_norm": 0.005242425017058849,
"learning_rate": 7.84686442232338e-06,
"loss": 0.0005,
"step": 39600
},
{
"epoch": 1.257722160620941,
"grad_norm": 0.12032686918973923,
"learning_rate": 7.813516082237007e-06,
"loss": 0.0002,
"step": 39700
},
{
"epoch": 1.2608902265167115,
"grad_norm": 3.002350786118768e-05,
"learning_rate": 7.780167742150634e-06,
"loss": 0.0003,
"step": 39800
},
{
"epoch": 1.2640582924124821,
"grad_norm": 1.667384094616864e-05,
"learning_rate": 7.746819402064264e-06,
"loss": 0.0003,
"step": 39900
},
{
"epoch": 1.2672263583082528,
"grad_norm": 0.004214556887745857,
"learning_rate": 7.713471061977891e-06,
"loss": 0.0002,
"step": 40000
},
{
"epoch": 1.2703944242040235,
"grad_norm": 0.00022098551562521607,
"learning_rate": 7.680122721891518e-06,
"loss": 0.0002,
"step": 40100
},
{
"epoch": 1.2735624900997942,
"grad_norm": 0.0006105100037530065,
"learning_rate": 7.646774381805146e-06,
"loss": 0.0003,
"step": 40200
},
{
"epoch": 1.2767305559955648,
"grad_norm": 0.0006831226055510342,
"learning_rate": 7.613426041718774e-06,
"loss": 0.0001,
"step": 40300
},
{
"epoch": 1.2798986218913353,
"grad_norm": 0.00035824175574816763,
"learning_rate": 7.580077701632402e-06,
"loss": 0.001,
"step": 40400
},
{
"epoch": 1.283066687787106,
"grad_norm": 0.0009726459975354373,
"learning_rate": 7.54672936154603e-06,
"loss": 0.0002,
"step": 40500
},
{
"epoch": 1.2862347536828767,
"grad_norm": 3.4098738979082555e-05,
"learning_rate": 7.513381021459657e-06,
"loss": 0.0003,
"step": 40600
},
{
"epoch": 1.289402819578647,
"grad_norm": 0.0016234411159530282,
"learning_rate": 7.480032681373285e-06,
"loss": 0.0004,
"step": 40700
},
{
"epoch": 1.2925708854744178,
"grad_norm": 0.019855381920933723,
"learning_rate": 7.446684341286913e-06,
"loss": 0.0004,
"step": 40800
},
{
"epoch": 1.2957389513701885,
"grad_norm": 0.010516811162233353,
"learning_rate": 7.41333600120054e-06,
"loss": 0.0002,
"step": 40900
},
{
"epoch": 1.2989070172659591,
"grad_norm": 0.1184462159872055,
"learning_rate": 7.379987661114169e-06,
"loss": 0.0006,
"step": 41000
},
{
"epoch": 1.3020750831617298,
"grad_norm": 3.537457814672962e-05,
"learning_rate": 7.346639321027796e-06,
"loss": 0.0002,
"step": 41100
},
{
"epoch": 1.3052431490575005,
"grad_norm": 0.0002039948885794729,
"learning_rate": 7.313290980941424e-06,
"loss": 0.0002,
"step": 41200
},
{
"epoch": 1.308411214953271,
"grad_norm": 0.06637877225875854,
"learning_rate": 7.279942640855052e-06,
"loss": 0.0003,
"step": 41300
},
{
"epoch": 1.3115792808490416,
"grad_norm": 0.06105900555849075,
"learning_rate": 7.24659430076868e-06,
"loss": 0.0002,
"step": 41400
},
{
"epoch": 1.3147473467448123,
"grad_norm": 0.0005946651799604297,
"learning_rate": 7.213245960682308e-06,
"loss": 0.0012,
"step": 41500
},
{
"epoch": 1.317915412640583,
"grad_norm": 0.00044926407281309366,
"learning_rate": 7.179897620595936e-06,
"loss": 0.0005,
"step": 41600
},
{
"epoch": 1.3210834785363534,
"grad_norm": 0.0034730539191514254,
"learning_rate": 7.146549280509564e-06,
"loss": 0.0022,
"step": 41700
},
{
"epoch": 1.3242515444321241,
"grad_norm": 0.00045728174154646695,
"learning_rate": 7.113200940423191e-06,
"loss": 0.0002,
"step": 41800
},
{
"epoch": 1.3274196103278948,
"grad_norm": 0.029374372214078903,
"learning_rate": 7.079852600336818e-06,
"loss": 0.0007,
"step": 41900
},
{
"epoch": 1.3305876762236655,
"grad_norm": 0.010232986882328987,
"learning_rate": 7.0465042602504465e-06,
"loss": 0.0003,
"step": 42000
},
{
"epoch": 1.3337557421194361,
"grad_norm": 0.00016134929319377989,
"learning_rate": 7.013155920164074e-06,
"loss": 0.0002,
"step": 42100
},
{
"epoch": 1.3369238080152068,
"grad_norm": 0.0003897528804372996,
"learning_rate": 6.979807580077702e-06,
"loss": 0.0002,
"step": 42200
},
{
"epoch": 1.3400918739109773,
"grad_norm": 0.0017471498576924205,
"learning_rate": 6.94645923999133e-06,
"loss": 0.0002,
"step": 42300
},
{
"epoch": 1.343259939806748,
"grad_norm": 0.017642421647906303,
"learning_rate": 6.913110899904958e-06,
"loss": 0.0001,
"step": 42400
},
{
"epoch": 1.3464280057025186,
"grad_norm": 0.0010317267151549459,
"learning_rate": 6.879762559818585e-06,
"loss": 0.0002,
"step": 42500
},
{
"epoch": 1.3495960715982893,
"grad_norm": 0.029274389147758484,
"learning_rate": 6.846414219732214e-06,
"loss": 0.0005,
"step": 42600
},
{
"epoch": 1.3527641374940598,
"grad_norm": 0.0007354663102887571,
"learning_rate": 6.813065879645842e-06,
"loss": 0.0001,
"step": 42700
},
{
"epoch": 1.3559322033898304,
"grad_norm": 0.00034641989623196423,
"learning_rate": 6.779717539559469e-06,
"loss": 0.0002,
"step": 42800
},
{
"epoch": 1.3591002692856011,
"grad_norm": 0.149564728140831,
"learning_rate": 6.746369199473098e-06,
"loss": 0.0003,
"step": 42900
},
{
"epoch": 1.3622683351813718,
"grad_norm": 0.011665324680507183,
"learning_rate": 6.713020859386724e-06,
"loss": 0.0004,
"step": 43000
},
{
"epoch": 1.3654364010771425,
"grad_norm": 0.00013306832988746464,
"learning_rate": 6.679672519300352e-06,
"loss": 0.0002,
"step": 43100
},
{
"epoch": 1.3686044669729132,
"grad_norm": 0.00031623971881344914,
"learning_rate": 6.64632417921398e-06,
"loss": 0.0004,
"step": 43200
},
{
"epoch": 1.3717725328686836,
"grad_norm": 0.003858871292322874,
"learning_rate": 6.612975839127607e-06,
"loss": 0.0008,
"step": 43300
},
{
"epoch": 1.3749405987644543,
"grad_norm": 0.00579412467777729,
"learning_rate": 6.579627499041236e-06,
"loss": 0.0002,
"step": 43400
},
{
"epoch": 1.378108664660225,
"grad_norm": 3.0335993869812228e-05,
"learning_rate": 6.546279158954864e-06,
"loss": 0.0004,
"step": 43500
},
{
"epoch": 1.3812767305559956,
"grad_norm": 0.0006304982816800475,
"learning_rate": 6.512930818868491e-06,
"loss": 0.0001,
"step": 43600
},
{
"epoch": 1.384444796451766,
"grad_norm": 0.00015773455379530787,
"learning_rate": 6.47958247878212e-06,
"loss": 0.0001,
"step": 43700
},
{
"epoch": 1.3876128623475368,
"grad_norm": 0.005809741094708443,
"learning_rate": 6.446234138695747e-06,
"loss": 0.009,
"step": 43800
},
{
"epoch": 1.3907809282433075,
"grad_norm": 0.0010448688408359885,
"learning_rate": 6.412885798609375e-06,
"loss": 0.0002,
"step": 43900
},
{
"epoch": 1.3939489941390781,
"grad_norm": 0.0030797335784882307,
"learning_rate": 6.379537458523003e-06,
"loss": 0.0002,
"step": 44000
},
{
"epoch": 1.3971170600348488,
"grad_norm": 0.00012675885227508843,
"learning_rate": 6.346189118436631e-06,
"loss": 0.0003,
"step": 44100
},
{
"epoch": 1.4002851259306195,
"grad_norm": 0.0043240697123110294,
"learning_rate": 6.312840778350258e-06,
"loss": 0.0003,
"step": 44200
},
{
"epoch": 1.40345319182639,
"grad_norm": 0.00020247649808879942,
"learning_rate": 6.279492438263885e-06,
"loss": 0.0002,
"step": 44300
},
{
"epoch": 1.4066212577221606,
"grad_norm": 0.0025763397570699453,
"learning_rate": 6.2461440981775134e-06,
"loss": 0.0004,
"step": 44400
},
{
"epoch": 1.4097893236179313,
"grad_norm": 0.00025575104518793523,
"learning_rate": 6.212795758091142e-06,
"loss": 0.0006,
"step": 44500
},
{
"epoch": 1.4129573895137018,
"grad_norm": 0.0006715962663292885,
"learning_rate": 6.179447418004769e-06,
"loss": 0.0002,
"step": 44600
},
{
"epoch": 1.4161254554094724,
"grad_norm": 0.0005802169325761497,
"learning_rate": 6.1460990779183974e-06,
"loss": 0.0004,
"step": 44700
},
{
"epoch": 1.419293521305243,
"grad_norm": 6.348552687995834e-06,
"learning_rate": 6.112750737832025e-06,
"loss": 0.0002,
"step": 44800
},
{
"epoch": 1.4224615872010138,
"grad_norm": 0.000993837951682508,
"learning_rate": 6.079402397745653e-06,
"loss": 0.0002,
"step": 44900
},
{
"epoch": 1.4256296530967845,
"grad_norm": 0.036865074187517166,
"learning_rate": 6.046054057659281e-06,
"loss": 0.0004,
"step": 45000
},
{
"epoch": 1.4287977189925551,
"grad_norm": 0.09351787716150284,
"learning_rate": 6.012705717572909e-06,
"loss": 0.0003,
"step": 45100
},
{
"epoch": 1.4319657848883256,
"grad_norm": 0.00022034939320292324,
"learning_rate": 5.979357377486536e-06,
"loss": 0.0002,
"step": 45200
},
{
"epoch": 1.4351338507840963,
"grad_norm": 0.002382364822551608,
"learning_rate": 5.946009037400165e-06,
"loss": 0.0002,
"step": 45300
},
{
"epoch": 1.438301916679867,
"grad_norm": 0.0010514174355193973,
"learning_rate": 5.912660697313791e-06,
"loss": 0.0001,
"step": 45400
},
{
"epoch": 1.4414699825756376,
"grad_norm": 0.0006282671820372343,
"learning_rate": 5.8793123572274195e-06,
"loss": 0.0002,
"step": 45500
},
{
"epoch": 1.444638048471408,
"grad_norm": 0.0001003501529339701,
"learning_rate": 5.845964017141047e-06,
"loss": 0.0002,
"step": 45600
},
{
"epoch": 1.4478061143671788,
"grad_norm": 0.0019161907257512212,
"learning_rate": 5.812615677054675e-06,
"loss": 0.0002,
"step": 45700
},
{
"epoch": 1.4509741802629494,
"grad_norm": 0.0002770457649603486,
"learning_rate": 5.779267336968303e-06,
"loss": 0.0004,
"step": 45800
},
{
"epoch": 1.4541422461587201,
"grad_norm": 0.001281541888602078,
"learning_rate": 5.745918996881931e-06,
"loss": 0.0003,
"step": 45900
},
{
"epoch": 1.4573103120544908,
"grad_norm": 0.028755199164152145,
"learning_rate": 5.712570656795558e-06,
"loss": 0.0002,
"step": 46000
},
{
"epoch": 1.4604783779502615,
"grad_norm": 0.0007570263114757836,
"learning_rate": 5.679222316709187e-06,
"loss": 0.0003,
"step": 46100
},
{
"epoch": 1.463646443846032,
"grad_norm": 0.00423109345138073,
"learning_rate": 5.645873976622814e-06,
"loss": 0.0001,
"step": 46200
},
{
"epoch": 1.4668145097418026,
"grad_norm": 5.3798950830241665e-05,
"learning_rate": 5.612525636536442e-06,
"loss": 0.0003,
"step": 46300
},
{
"epoch": 1.4699825756375733,
"grad_norm": 7.343962352024391e-05,
"learning_rate": 5.57917729645007e-06,
"loss": 0.0003,
"step": 46400
},
{
"epoch": 1.473150641533344,
"grad_norm": 0.025961237028241158,
"learning_rate": 5.545828956363698e-06,
"loss": 0.0002,
"step": 46500
},
{
"epoch": 1.4763187074291144,
"grad_norm": 0.00014132962678559124,
"learning_rate": 5.512480616277325e-06,
"loss": 0.0002,
"step": 46600
},
{
"epoch": 1.479486773324885,
"grad_norm": 0.0044896723702549934,
"learning_rate": 5.479132276190953e-06,
"loss": 0.0002,
"step": 46700
},
{
"epoch": 1.4826548392206558,
"grad_norm": 5.546275497181341e-05,
"learning_rate": 5.44578393610458e-06,
"loss": 0.0002,
"step": 46800
},
{
"epoch": 1.4858229051164265,
"grad_norm": 0.0015877331607043743,
"learning_rate": 5.412435596018209e-06,
"loss": 0.0001,
"step": 46900
},
{
"epoch": 1.4889909710121971,
"grad_norm": 8.771561260800809e-05,
"learning_rate": 5.379087255931836e-06,
"loss": 0.0001,
"step": 47000
},
{
"epoch": 1.4921590369079678,
"grad_norm": 9.971875260816887e-05,
"learning_rate": 5.345738915845464e-06,
"loss": 0.0001,
"step": 47100
},
{
"epoch": 1.4953271028037383,
"grad_norm": 6.665828550467268e-05,
"learning_rate": 5.312390575759092e-06,
"loss": 0.0001,
"step": 47200
},
{
"epoch": 1.498495168699509,
"grad_norm": 0.12465495616197586,
"learning_rate": 5.27904223567272e-06,
"loss": 0.0003,
"step": 47300
},
{
"epoch": 1.5016632345952796,
"grad_norm": 0.00016497267642989755,
"learning_rate": 5.2456938955863476e-06,
"loss": 0.0001,
"step": 47400
},
{
"epoch": 1.50483130049105,
"grad_norm": 0.000793833751231432,
"learning_rate": 5.212345555499976e-06,
"loss": 0.0001,
"step": 47500
},
{
"epoch": 1.5079993663868207,
"grad_norm": 0.004431690089404583,
"learning_rate": 5.178997215413603e-06,
"loss": 0.0001,
"step": 47600
},
{
"epoch": 1.5111674322825914,
"grad_norm": 8.146934123942629e-05,
"learning_rate": 5.1456488753272316e-06,
"loss": 0.0004,
"step": 47700
},
{
"epoch": 1.514335498178362,
"grad_norm": 0.0032630818895995617,
"learning_rate": 5.112300535240858e-06,
"loss": 0.0003,
"step": 47800
},
{
"epoch": 1.5175035640741328,
"grad_norm": 3.78349454877025e-06,
"learning_rate": 5.0789521951544864e-06,
"loss": 0.0002,
"step": 47900
},
{
"epoch": 1.5206716299699035,
"grad_norm": 7.793370605213568e-05,
"learning_rate": 5.045603855068114e-06,
"loss": 0.0002,
"step": 48000
},
{
"epoch": 1.5238396958656741,
"grad_norm": 1.0998847756127361e-05,
"learning_rate": 5.012255514981742e-06,
"loss": 0.0002,
"step": 48100
},
{
"epoch": 1.5270077617614446,
"grad_norm": 7.101731171132997e-05,
"learning_rate": 4.97890717489537e-06,
"loss": 0.0002,
"step": 48200
},
{
"epoch": 1.5301758276572153,
"grad_norm": 0.04721503704786301,
"learning_rate": 4.945558834808998e-06,
"loss": 0.0005,
"step": 48300
},
{
"epoch": 1.533343893552986,
"grad_norm": 0.04013681039214134,
"learning_rate": 4.912210494722625e-06,
"loss": 0.0002,
"step": 48400
},
{
"epoch": 1.5365119594487564,
"grad_norm": 0.15584056079387665,
"learning_rate": 4.878862154636254e-06,
"loss": 0.0002,
"step": 48500
},
{
"epoch": 1.539680025344527,
"grad_norm": 0.008085441775619984,
"learning_rate": 4.845513814549881e-06,
"loss": 0.0002,
"step": 48600
},
{
"epoch": 1.5428480912402978,
"grad_norm": 0.00025607392308302224,
"learning_rate": 4.8121654744635085e-06,
"loss": 0.0001,
"step": 48700
},
{
"epoch": 1.5460161571360684,
"grad_norm": 0.009352591820061207,
"learning_rate": 4.778817134377137e-06,
"loss": 0.0001,
"step": 48800
},
{
"epoch": 1.5491842230318391,
"grad_norm": 0.00013986592239234596,
"learning_rate": 4.745468794290764e-06,
"loss": 0.0001,
"step": 48900
},
{
"epoch": 1.5523522889276098,
"grad_norm": 0.0005525100277736783,
"learning_rate": 4.7121204542043925e-06,
"loss": 0.0002,
"step": 49000
},
{
"epoch": 1.5555203548233805,
"grad_norm": 0.002130384324118495,
"learning_rate": 4.678772114118021e-06,
"loss": 0.0003,
"step": 49100
},
{
"epoch": 1.558688420719151,
"grad_norm": 0.00036302325315773487,
"learning_rate": 4.645423774031647e-06,
"loss": 0.0002,
"step": 49200
},
{
"epoch": 1.5618564866149216,
"grad_norm": 2.546385076129809e-05,
"learning_rate": 4.612075433945276e-06,
"loss": 0.0002,
"step": 49300
},
{
"epoch": 1.565024552510692,
"grad_norm": 0.0013008471578359604,
"learning_rate": 4.578727093858903e-06,
"loss": 0.0002,
"step": 49400
},
{
"epoch": 1.5681926184064627,
"grad_norm": 0.0018331869505345821,
"learning_rate": 4.545378753772531e-06,
"loss": 0.0002,
"step": 49500
},
{
"epoch": 1.5713606843022334,
"grad_norm": 0.00046941745677031577,
"learning_rate": 4.51203041368616e-06,
"loss": 0.0001,
"step": 49600
},
{
"epoch": 1.574528750198004,
"grad_norm": 0.09666042774915695,
"learning_rate": 4.478682073599787e-06,
"loss": 0.0001,
"step": 49700
},
{
"epoch": 1.5776968160937748,
"grad_norm": 0.000714512774720788,
"learning_rate": 4.4453337335134145e-06,
"loss": 0.0002,
"step": 49800
},
{
"epoch": 1.5808648819895454,
"grad_norm": 0.001266203005798161,
"learning_rate": 4.411985393427042e-06,
"loss": 0.0002,
"step": 49900
},
{
"epoch": 1.5840329478853161,
"grad_norm": 0.00010851142724277452,
"learning_rate": 4.37863705334067e-06,
"loss": 0.0001,
"step": 50000
},
{
"epoch": 1.5872010137810868,
"grad_norm": 0.02388921193778515,
"learning_rate": 4.3452887132542985e-06,
"loss": 0.0001,
"step": 50100
},
{
"epoch": 1.5903690796768573,
"grad_norm": 0.004322574008256197,
"learning_rate": 4.311940373167926e-06,
"loss": 0.0001,
"step": 50200
},
{
"epoch": 1.593537145572628,
"grad_norm": 0.0002055590011877939,
"learning_rate": 4.278592033081554e-06,
"loss": 0.0008,
"step": 50300
},
{
"epoch": 1.5967052114683984,
"grad_norm": 0.006136850919574499,
"learning_rate": 4.245243692995181e-06,
"loss": 0.0001,
"step": 50400
},
{
"epoch": 1.599873277364169,
"grad_norm": 0.03570784255862236,
"learning_rate": 4.211895352908809e-06,
"loss": 0.0002,
"step": 50500
},
{
"epoch": 1.6030413432599397,
"grad_norm": 0.00013556861085817218,
"learning_rate": 4.178547012822437e-06,
"loss": 0.0002,
"step": 50600
},
{
"epoch": 1.6062094091557104,
"grad_norm": 0.00039382753311656415,
"learning_rate": 4.145198672736065e-06,
"loss": 0.0,
"step": 50700
},
{
"epoch": 1.609377475051481,
"grad_norm": 0.014802640303969383,
"learning_rate": 4.111850332649693e-06,
"loss": 0.0001,
"step": 50800
},
{
"epoch": 1.6125455409472518,
"grad_norm": 0.002426808699965477,
"learning_rate": 4.0785019925633206e-06,
"loss": 0.0001,
"step": 50900
},
{
"epoch": 1.6157136068430225,
"grad_norm": 0.0027719761710613966,
"learning_rate": 4.045153652476948e-06,
"loss": 0.0003,
"step": 51000
},
{
"epoch": 1.6188816727387931,
"grad_norm": 0.0003508755180519074,
"learning_rate": 4.011805312390576e-06,
"loss": 0.0007,
"step": 51100
},
{
"epoch": 1.6220497386345636,
"grad_norm": 0.00042187023791484535,
"learning_rate": 3.978456972304204e-06,
"loss": 0.0001,
"step": 51200
},
{
"epoch": 1.6252178045303343,
"grad_norm": 0.00010520713840378448,
"learning_rate": 3.945108632217832e-06,
"loss": 0.0001,
"step": 51300
},
{
"epoch": 1.6283858704261047,
"grad_norm": 2.475667861290276e-05,
"learning_rate": 3.9117602921314594e-06,
"loss": 0.0002,
"step": 51400
},
{
"epoch": 1.6315539363218754,
"grad_norm": 0.00042487168684601784,
"learning_rate": 3.878411952045088e-06,
"loss": 0.0002,
"step": 51500
},
{
"epoch": 1.634722002217646,
"grad_norm": 0.0007512226002290845,
"learning_rate": 3.845063611958715e-06,
"loss": 0.0003,
"step": 51600
},
{
"epoch": 1.6378900681134168,
"grad_norm": 0.0007686197641305625,
"learning_rate": 3.8117152718723426e-06,
"loss": 0.0002,
"step": 51700
},
{
"epoch": 1.6410581340091874,
"grad_norm": 0.006279453635215759,
"learning_rate": 3.7783669317859705e-06,
"loss": 0.0002,
"step": 51800
},
{
"epoch": 1.644226199904958,
"grad_norm": 5.683067865902558e-05,
"learning_rate": 3.7450185916995983e-06,
"loss": 0.0001,
"step": 51900
},
{
"epoch": 1.6473942658007288,
"grad_norm": 0.08361367881298065,
"learning_rate": 3.711670251613226e-06,
"loss": 0.0002,
"step": 52000
},
{
"epoch": 1.6505623316964992,
"grad_norm": 1.9708577394485474,
"learning_rate": 3.678321911526854e-06,
"loss": 0.0004,
"step": 52100
},
{
"epoch": 1.65373039759227,
"grad_norm": 0.01258891262114048,
"learning_rate": 3.6449735714404815e-06,
"loss": 0.0002,
"step": 52200
},
{
"epoch": 1.6568984634880406,
"grad_norm": 1.2053630598529708e-05,
"learning_rate": 3.6116252313541093e-06,
"loss": 0.0001,
"step": 52300
},
{
"epoch": 1.660066529383811,
"grad_norm": 0.007339359261095524,
"learning_rate": 3.578276891267737e-06,
"loss": 0.0006,
"step": 52400
},
{
"epoch": 1.6632345952795817,
"grad_norm": 7.470462151104584e-05,
"learning_rate": 3.544928551181365e-06,
"loss": 0.0003,
"step": 52500
},
{
"epoch": 1.6664026611753524,
"grad_norm": 0.001069075195118785,
"learning_rate": 3.5115802110949933e-06,
"loss": 0.0001,
"step": 52600
},
{
"epoch": 1.669570727071123,
"grad_norm": 0.0014466423308476806,
"learning_rate": 3.478231871008621e-06,
"loss": 0.0001,
"step": 52700
},
{
"epoch": 1.6727387929668938,
"grad_norm": 0.0008716689771972597,
"learning_rate": 3.4448835309222482e-06,
"loss": 0.0001,
"step": 52800
},
{
"epoch": 1.6759068588626644,
"grad_norm": 0.0006247049896046519,
"learning_rate": 3.411535190835876e-06,
"loss": 0.0002,
"step": 52900
},
{
"epoch": 1.6790749247584351,
"grad_norm": 0.0016816813731566072,
"learning_rate": 3.378186850749504e-06,
"loss": 0.0001,
"step": 53000
},
{
"epoch": 1.6822429906542056,
"grad_norm": 0.015677401795983315,
"learning_rate": 3.3448385106631322e-06,
"loss": 0.0001,
"step": 53100
},
{
"epoch": 1.6854110565499762,
"grad_norm": 0.0020264824852347374,
"learning_rate": 3.31149017057676e-06,
"loss": 0.0003,
"step": 53200
},
{
"epoch": 1.6885791224457467,
"grad_norm": 0.006521924398839474,
"learning_rate": 3.278141830490388e-06,
"loss": 0.0002,
"step": 53300
},
{
"epoch": 1.6917471883415174,
"grad_norm": 0.055716466158628464,
"learning_rate": 3.244793490404015e-06,
"loss": 0.0001,
"step": 53400
},
{
"epoch": 1.694915254237288,
"grad_norm": 0.0009364295983687043,
"learning_rate": 3.2114451503176432e-06,
"loss": 0.0002,
"step": 53500
},
{
"epoch": 1.6980833201330587,
"grad_norm": 0.026155732572078705,
"learning_rate": 3.178096810231271e-06,
"loss": 0.0001,
"step": 53600
},
{
"epoch": 1.7012513860288294,
"grad_norm": 0.0009285922278650105,
"learning_rate": 3.144748470144899e-06,
"loss": 0.0009,
"step": 53700
},
{
"epoch": 1.7044194519246,
"grad_norm": 0.00019248783064540476,
"learning_rate": 3.111400130058527e-06,
"loss": 0.0002,
"step": 53800
},
{
"epoch": 1.7075875178203708,
"grad_norm": 0.008935322985053062,
"learning_rate": 3.0780517899721547e-06,
"loss": 0.0002,
"step": 53900
},
{
"epoch": 1.7107555837161414,
"grad_norm": 3.448131974437274e-05,
"learning_rate": 3.044703449885782e-06,
"loss": 0.0002,
"step": 54000
},
{
"epoch": 1.713923649611912,
"grad_norm": 0.0037920591421425343,
"learning_rate": 3.01135510979941e-06,
"loss": 0.0001,
"step": 54100
},
{
"epoch": 1.7170917155076826,
"grad_norm": 0.0004557653737720102,
"learning_rate": 2.978006769713038e-06,
"loss": 0.0001,
"step": 54200
},
{
"epoch": 1.720259781403453,
"grad_norm": 0.001680429675616324,
"learning_rate": 2.9446584296266657e-06,
"loss": 0.0002,
"step": 54300
},
{
"epoch": 1.7234278472992237,
"grad_norm": 0.006457743234932423,
"learning_rate": 2.9113100895402936e-06,
"loss": 0.0001,
"step": 54400
},
{
"epoch": 1.7265959131949944,
"grad_norm": 0.0018418490653857589,
"learning_rate": 2.8779617494539214e-06,
"loss": 0.0002,
"step": 54500
},
{
"epoch": 1.729763979090765,
"grad_norm": 0.012908555567264557,
"learning_rate": 2.844613409367549e-06,
"loss": 0.0001,
"step": 54600
},
{
"epoch": 1.7329320449865357,
"grad_norm": 0.0021234566811472178,
"learning_rate": 2.8112650692811767e-06,
"loss": 0.0002,
"step": 54700
},
{
"epoch": 1.7361001108823064,
"grad_norm": 0.3285054862499237,
"learning_rate": 2.7779167291948046e-06,
"loss": 0.0001,
"step": 54800
},
{
"epoch": 1.739268176778077,
"grad_norm": 0.0007893216679804027,
"learning_rate": 2.7445683891084324e-06,
"loss": 0.0001,
"step": 54900
},
{
"epoch": 1.7424362426738478,
"grad_norm": 0.02494579553604126,
"learning_rate": 2.7112200490220603e-06,
"loss": 0.0002,
"step": 55000
},
{
"epoch": 1.7456043085696182,
"grad_norm": 0.0023814570158720016,
"learning_rate": 2.677871708935688e-06,
"loss": 0.0002,
"step": 55100
},
{
"epoch": 1.748772374465389,
"grad_norm": 0.0058886525221168995,
"learning_rate": 2.6445233688493156e-06,
"loss": 0.0002,
"step": 55200
},
{
"epoch": 1.7519404403611594,
"grad_norm": 4.876612001680769e-05,
"learning_rate": 2.6111750287629435e-06,
"loss": 0.0002,
"step": 55300
},
{
"epoch": 1.75510850625693,
"grad_norm": 0.010443676263093948,
"learning_rate": 2.5778266886765713e-06,
"loss": 0.0002,
"step": 55400
},
{
"epoch": 1.7582765721527007,
"grad_norm": 0.01249265018850565,
"learning_rate": 2.544478348590199e-06,
"loss": 0.0001,
"step": 55500
},
{
"epoch": 1.7614446380484714,
"grad_norm": 0.00021050056966487318,
"learning_rate": 2.511130008503827e-06,
"loss": 0.0002,
"step": 55600
},
{
"epoch": 1.764612703944242,
"grad_norm": 0.01633504591882229,
"learning_rate": 2.4777816684174545e-06,
"loss": 0.0001,
"step": 55700
},
{
"epoch": 1.7677807698400128,
"grad_norm": 3.676281266962178e-05,
"learning_rate": 2.4444333283310823e-06,
"loss": 0.0003,
"step": 55800
},
{
"epoch": 1.7709488357357834,
"grad_norm": 0.002830359386280179,
"learning_rate": 2.4110849882447106e-06,
"loss": 0.0001,
"step": 55900
},
{
"epoch": 1.7741169016315539,
"grad_norm": 0.0029975976794958115,
"learning_rate": 2.377736648158338e-06,
"loss": 0.0002,
"step": 56000
},
{
"epoch": 1.7772849675273246,
"grad_norm": 0.005194125231355429,
"learning_rate": 2.344388308071966e-06,
"loss": 0.0001,
"step": 56100
},
{
"epoch": 1.7804530334230952,
"grad_norm": 7.272951734194066e-06,
"learning_rate": 2.3110399679855938e-06,
"loss": 0.0003,
"step": 56200
},
{
"epoch": 1.7836210993188657,
"grad_norm": 0.0007229465409182012,
"learning_rate": 2.2776916278992216e-06,
"loss": 0.0002,
"step": 56300
},
{
"epoch": 1.7867891652146364,
"grad_norm": 0.0005428678123280406,
"learning_rate": 2.2443432878128495e-06,
"loss": 0.0004,
"step": 56400
},
{
"epoch": 1.789957231110407,
"grad_norm": 0.017834417521953583,
"learning_rate": 2.2109949477264774e-06,
"loss": 0.0002,
"step": 56500
},
{
"epoch": 1.7931252970061777,
"grad_norm": 0.0003519799211062491,
"learning_rate": 2.177646607640105e-06,
"loss": 0.0002,
"step": 56600
},
{
"epoch": 1.7962933629019484,
"grad_norm": 5.785848043160513e-05,
"learning_rate": 2.1442982675537327e-06,
"loss": 0.0001,
"step": 56700
},
{
"epoch": 1.799461428797719,
"grad_norm": 0.00827944464981556,
"learning_rate": 2.1109499274673605e-06,
"loss": 0.0001,
"step": 56800
},
{
"epoch": 1.8026294946934898,
"grad_norm": 8.96235360414721e-06,
"learning_rate": 2.0776015873809884e-06,
"loss": 0.0001,
"step": 56900
},
{
"epoch": 1.8057975605892602,
"grad_norm": 8.50809519761242e-05,
"learning_rate": 2.0442532472946162e-06,
"loss": 0.0002,
"step": 57000
},
{
"epoch": 1.808965626485031,
"grad_norm": 0.0564473532140255,
"learning_rate": 2.010904907208244e-06,
"loss": 0.0002,
"step": 57100
},
{
"epoch": 1.8121336923808016,
"grad_norm": 0.0019106407416984439,
"learning_rate": 1.9775565671218715e-06,
"loss": 0.0002,
"step": 57200
},
{
"epoch": 1.815301758276572,
"grad_norm": 0.0011765076778829098,
"learning_rate": 1.9442082270354994e-06,
"loss": 0.0002,
"step": 57300
},
{
"epoch": 1.8184698241723427,
"grad_norm": 0.002034899080172181,
"learning_rate": 1.9108598869491273e-06,
"loss": 0.0001,
"step": 57400
},
{
"epoch": 1.8216378900681134,
"grad_norm": 2.886955189751461e-05,
"learning_rate": 1.877511546862755e-06,
"loss": 0.0003,
"step": 57500
},
{
"epoch": 1.824805955963884,
"grad_norm": 0.0003313591005280614,
"learning_rate": 1.844163206776383e-06,
"loss": 0.0001,
"step": 57600
},
{
"epoch": 1.8279740218596547,
"grad_norm": 0.0030703512020409107,
"learning_rate": 1.8108148666900108e-06,
"loss": 0.0001,
"step": 57700
},
{
"epoch": 1.8311420877554254,
"grad_norm": 0.05901242792606354,
"learning_rate": 1.7774665266036383e-06,
"loss": 0.0002,
"step": 57800
},
{
"epoch": 1.834310153651196,
"grad_norm": 0.00020717663574032485,
"learning_rate": 1.7441181865172664e-06,
"loss": 0.0002,
"step": 57900
},
{
"epoch": 1.8374782195469666,
"grad_norm": 3.810102498391643e-05,
"learning_rate": 1.7107698464308942e-06,
"loss": 0.0007,
"step": 58000
},
{
"epoch": 1.8406462854427372,
"grad_norm": 0.00012426413013599813,
"learning_rate": 1.6774215063445219e-06,
"loss": 0.0001,
"step": 58100
},
{
"epoch": 1.8438143513385077,
"grad_norm": 0.00743386335670948,
"learning_rate": 1.6440731662581497e-06,
"loss": 0.0001,
"step": 58200
},
{
"epoch": 1.8469824172342784,
"grad_norm": 0.04067447409033775,
"learning_rate": 1.6107248261717776e-06,
"loss": 0.0001,
"step": 58300
},
{
"epoch": 1.850150483130049,
"grad_norm": 2.0743360437336378e-05,
"learning_rate": 1.5773764860854052e-06,
"loss": 0.0001,
"step": 58400
},
{
"epoch": 1.8533185490258197,
"grad_norm": 0.006036572623997927,
"learning_rate": 1.544028145999033e-06,
"loss": 0.0001,
"step": 58500
},
{
"epoch": 1.8564866149215904,
"grad_norm": 0.0008050315082073212,
"learning_rate": 1.510679805912661e-06,
"loss": 0.0001,
"step": 58600
}
],
"logging_steps": 100,
"max_steps": 63130,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}