| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.8564866149215904, | |
| "eval_steps": 500, | |
| "global_step": 58600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003168065895770632, | |
| "grad_norm": 0.0008460358949378133, | |
| "learning_rate": 6.335128286347799e-07, | |
| "loss": 0.0007, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.006336131791541264, | |
| "grad_norm": 0.006260558497160673, | |
| "learning_rate": 1.2670256572695599e-06, | |
| "loss": 0.0004, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.009504197687311896, | |
| "grad_norm": 0.0002755998866632581, | |
| "learning_rate": 1.9005384859043396e-06, | |
| "loss": 0.0027, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.012672263583082528, | |
| "grad_norm": 0.004525843542069197, | |
| "learning_rate": 2.5340513145391198e-06, | |
| "loss": 0.003, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.01584032947885316, | |
| "grad_norm": 0.009563793428242207, | |
| "learning_rate": 3.1675641431738997e-06, | |
| "loss": 0.0002, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.01900839537462379, | |
| "grad_norm": 2.0679131921497174e-05, | |
| "learning_rate": 3.801076971808679e-06, | |
| "loss": 0.0025, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.022176461270394424, | |
| "grad_norm": 3.369219848536886e-05, | |
| "learning_rate": 4.434589800443459e-06, | |
| "loss": 0.0007, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.025344527166165056, | |
| "grad_norm": 0.03423422574996948, | |
| "learning_rate": 5.0681026290782395e-06, | |
| "loss": 0.0028, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.02851259306193569, | |
| "grad_norm": 0.028383228927850723, | |
| "learning_rate": 5.701615457713019e-06, | |
| "loss": 0.001, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.03168065895770632, | |
| "grad_norm": 8.241041359724477e-05, | |
| "learning_rate": 6.335128286347799e-06, | |
| "loss": 0.001, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.03484872485347695, | |
| "grad_norm": 0.015536214224994183, | |
| "learning_rate": 6.96864111498258e-06, | |
| "loss": 0.0024, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.03801679074924758, | |
| "grad_norm": 0.0755501538515091, | |
| "learning_rate": 7.602153943617358e-06, | |
| "loss": 0.0006, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.04118485664501822, | |
| "grad_norm": 0.011010600253939629, | |
| "learning_rate": 8.235666772252139e-06, | |
| "loss": 0.0056, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.04435292254078885, | |
| "grad_norm": 4.7160243411781266e-05, | |
| "learning_rate": 8.869179600886918e-06, | |
| "loss": 0.0037, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.04752098843655948, | |
| "grad_norm": 0.04266763851046562, | |
| "learning_rate": 9.502692429521698e-06, | |
| "loss": 0.0026, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.05068905433233011, | |
| "grad_norm": 0.03944120928645134, | |
| "learning_rate": 1.0136205258156479e-05, | |
| "loss": 0.0012, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.05385712022810074, | |
| "grad_norm": 0.0018407816532999277, | |
| "learning_rate": 1.0769718086791259e-05, | |
| "loss": 0.0005, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.05702518612387138, | |
| "grad_norm": 0.0038744837511330843, | |
| "learning_rate": 1.1403230915426038e-05, | |
| "loss": 0.0003, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.060193252019642006, | |
| "grad_norm": 0.002657032571732998, | |
| "learning_rate": 1.203674374406082e-05, | |
| "loss": 0.0006, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.06336131791541263, | |
| "grad_norm": 0.001354015665128827, | |
| "learning_rate": 1.2670256572695599e-05, | |
| "loss": 0.0004, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.06652938381118327, | |
| "grad_norm": 0.03860320523381233, | |
| "learning_rate": 1.3303769401330378e-05, | |
| "loss": 0.0034, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.0696974497069539, | |
| "grad_norm": 0.004643497057259083, | |
| "learning_rate": 1.393728222996516e-05, | |
| "loss": 0.0015, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.07286551560272454, | |
| "grad_norm": 0.0005492149502970278, | |
| "learning_rate": 1.4570795058599937e-05, | |
| "loss": 0.0011, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.07603358149849516, | |
| "grad_norm": 0.005505191162228584, | |
| "learning_rate": 1.5204307887234717e-05, | |
| "loss": 0.0006, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.0792016473942658, | |
| "grad_norm": 0.00082977837882936, | |
| "learning_rate": 1.58378207158695e-05, | |
| "loss": 0.0015, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.08236971329003644, | |
| "grad_norm": 6.766305159544572e-05, | |
| "learning_rate": 1.6471333544504278e-05, | |
| "loss": 0.0025, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.08553777918580706, | |
| "grad_norm": 0.06128118559718132, | |
| "learning_rate": 1.7104846373139055e-05, | |
| "loss": 0.0006, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.0887058450815777, | |
| "grad_norm": 0.43053677678108215, | |
| "learning_rate": 1.7738359201773837e-05, | |
| "loss": 0.0007, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.09187391097734833, | |
| "grad_norm": 0.00012383765715640038, | |
| "learning_rate": 1.8371872030408618e-05, | |
| "loss": 0.0011, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.09504197687311897, | |
| "grad_norm": 3.1947878596838564e-05, | |
| "learning_rate": 1.9005384859043396e-05, | |
| "loss": 0.0009, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.09821004276888959, | |
| "grad_norm": 0.0004990168381482363, | |
| "learning_rate": 1.9638897687678177e-05, | |
| "loss": 0.0019, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.10137810866466022, | |
| "grad_norm": 0.003429220989346504, | |
| "learning_rate": 1.998566021376286e-05, | |
| "loss": 0.0036, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.10454617456043086, | |
| "grad_norm": 0.00011575384996831417, | |
| "learning_rate": 1.995231187367649e-05, | |
| "loss": 0.0047, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.10771424045620148, | |
| "grad_norm": 0.001659040222875774, | |
| "learning_rate": 1.9918963533590115e-05, | |
| "loss": 0.0044, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.11088230635197212, | |
| "grad_norm": 0.00041119763045571744, | |
| "learning_rate": 1.9885615193503744e-05, | |
| "loss": 0.0018, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.11405037224774275, | |
| "grad_norm": 0.003299827454611659, | |
| "learning_rate": 1.9852266853417373e-05, | |
| "loss": 0.001, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.11721843814351339, | |
| "grad_norm": 8.044855348998681e-05, | |
| "learning_rate": 1.9818918513331e-05, | |
| "loss": 0.0027, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.12038650403928401, | |
| "grad_norm": 0.00031627726275473833, | |
| "learning_rate": 1.9785570173244628e-05, | |
| "loss": 0.0061, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.12355456993505465, | |
| "grad_norm": 0.001032638712786138, | |
| "learning_rate": 1.9752221833158257e-05, | |
| "loss": 0.001, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.12672263583082527, | |
| "grad_norm": 0.0009450612124055624, | |
| "learning_rate": 1.9718873493071883e-05, | |
| "loss": 0.0074, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.12989070172659592, | |
| "grad_norm": 0.6000776290893555, | |
| "learning_rate": 1.9685525152985512e-05, | |
| "loss": 0.0061, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.13305876762236654, | |
| "grad_norm": 0.1603873372077942, | |
| "learning_rate": 1.965217681289914e-05, | |
| "loss": 0.0015, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.13622683351813716, | |
| "grad_norm": 0.0001428043033229187, | |
| "learning_rate": 1.9618828472812767e-05, | |
| "loss": 0.0024, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.1393948994139078, | |
| "grad_norm": 0.042562585324048996, | |
| "learning_rate": 1.9585480132726396e-05, | |
| "loss": 0.0051, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.14256296530967844, | |
| "grad_norm": 0.2654358744621277, | |
| "learning_rate": 1.9552131792640022e-05, | |
| "loss": 0.0036, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.14573103120544909, | |
| "grad_norm": 0.0015032750088721514, | |
| "learning_rate": 1.951878345255365e-05, | |
| "loss": 0.0004, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.1488990971012197, | |
| "grad_norm": 9.152581333182752e-05, | |
| "learning_rate": 1.948543511246728e-05, | |
| "loss": 0.0059, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.15206716299699033, | |
| "grad_norm": 0.013867395929992199, | |
| "learning_rate": 1.9452086772380906e-05, | |
| "loss": 0.0009, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.15523522889276098, | |
| "grad_norm": 0.018314680084586143, | |
| "learning_rate": 1.9418738432294535e-05, | |
| "loss": 0.001, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.1584032947885316, | |
| "grad_norm": 0.17640839517116547, | |
| "learning_rate": 1.9385390092208164e-05, | |
| "loss": 0.0008, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.16157136068430222, | |
| "grad_norm": 0.051024582237005234, | |
| "learning_rate": 1.935204175212179e-05, | |
| "loss": 0.0011, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.16473942658007287, | |
| "grad_norm": 0.003037165617570281, | |
| "learning_rate": 1.931869341203542e-05, | |
| "loss": 0.0009, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.1679074924758435, | |
| "grad_norm": 0.011393848806619644, | |
| "learning_rate": 1.9285345071949045e-05, | |
| "loss": 0.0087, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.17107555837161412, | |
| "grad_norm": 0.0004570337769109756, | |
| "learning_rate": 1.925199673186267e-05, | |
| "loss": 0.0018, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.17424362426738477, | |
| "grad_norm": 4.8830272135091946e-05, | |
| "learning_rate": 1.92186483917763e-05, | |
| "loss": 0.0071, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.1774116901631554, | |
| "grad_norm": 0.3802063763141632, | |
| "learning_rate": 1.918530005168993e-05, | |
| "loss": 0.0021, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.18057975605892604, | |
| "grad_norm": 0.067596435546875, | |
| "learning_rate": 1.9151951711603555e-05, | |
| "loss": 0.0009, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.18374782195469666, | |
| "grad_norm": 0.0005965412128716707, | |
| "learning_rate": 1.9118603371517184e-05, | |
| "loss": 0.001, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.18691588785046728, | |
| "grad_norm": 0.13070069253444672, | |
| "learning_rate": 1.9085255031430813e-05, | |
| "loss": 0.0005, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.19008395374623793, | |
| "grad_norm": 0.020468149334192276, | |
| "learning_rate": 1.905190669134444e-05, | |
| "loss": 0.006, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.19325201964200855, | |
| "grad_norm": 0.026921125128865242, | |
| "learning_rate": 1.9018558351258068e-05, | |
| "loss": 0.004, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.19642008553777918, | |
| "grad_norm": 0.025172384455800056, | |
| "learning_rate": 1.8985210011171697e-05, | |
| "loss": 0.0007, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.19958815143354983, | |
| "grad_norm": 0.0012900714064016938, | |
| "learning_rate": 1.8951861671085323e-05, | |
| "loss": 0.0005, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.20275621732932045, | |
| "grad_norm": 0.007845859974622726, | |
| "learning_rate": 1.8918513330998952e-05, | |
| "loss": 0.0012, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.20592428322509107, | |
| "grad_norm": 0.22305609285831451, | |
| "learning_rate": 1.8885164990912578e-05, | |
| "loss": 0.0008, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.20909234912086172, | |
| "grad_norm": 0.0017252659890800714, | |
| "learning_rate": 1.8851816650826207e-05, | |
| "loss": 0.0151, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.21226041501663234, | |
| "grad_norm": 0.03503908962011337, | |
| "learning_rate": 1.8818468310739836e-05, | |
| "loss": 0.0007, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.21542848091240296, | |
| "grad_norm": 0.00017412351735401899, | |
| "learning_rate": 1.878511997065346e-05, | |
| "loss": 0.001, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.21859654680817361, | |
| "grad_norm": 0.25898587703704834, | |
| "learning_rate": 1.875177163056709e-05, | |
| "loss": 0.001, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.22176461270394424, | |
| "grad_norm": 0.00022779431310482323, | |
| "learning_rate": 1.871842329048072e-05, | |
| "loss": 0.0022, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.22493267859971489, | |
| "grad_norm": 0.0009634292218834162, | |
| "learning_rate": 1.8685074950394346e-05, | |
| "loss": 0.0017, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.2281007444954855, | |
| "grad_norm": 0.0021817036904394627, | |
| "learning_rate": 1.8651726610307975e-05, | |
| "loss": 0.0026, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.23126881039125613, | |
| "grad_norm": 0.01804823987185955, | |
| "learning_rate": 1.86183782702216e-05, | |
| "loss": 0.0012, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.23443687628702678, | |
| "grad_norm": 0.022183051332831383, | |
| "learning_rate": 1.858502993013523e-05, | |
| "loss": 0.0029, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.2376049421827974, | |
| "grad_norm": 0.0007926349644549191, | |
| "learning_rate": 1.855168159004886e-05, | |
| "loss": 0.0006, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.24077300807856802, | |
| "grad_norm": 0.0001486311521148309, | |
| "learning_rate": 1.8518333249962484e-05, | |
| "loss": 0.0019, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.24394107397433867, | |
| "grad_norm": 0.11593101173639297, | |
| "learning_rate": 1.848498490987611e-05, | |
| "loss": 0.0049, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.2471091398701093, | |
| "grad_norm": 0.06385669112205505, | |
| "learning_rate": 1.845163656978974e-05, | |
| "loss": 0.0018, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.2502772057658799, | |
| "grad_norm": 0.001922784373164177, | |
| "learning_rate": 1.841828822970337e-05, | |
| "loss": 0.0045, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.25344527166165054, | |
| "grad_norm": 0.0033132501412183046, | |
| "learning_rate": 1.8384939889616994e-05, | |
| "loss": 0.002, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.2566133375574212, | |
| "grad_norm": 0.0015832999488338828, | |
| "learning_rate": 1.8351591549530623e-05, | |
| "loss": 0.0009, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.25978140345319184, | |
| "grad_norm": 0.00015198950131889433, | |
| "learning_rate": 1.8318243209444252e-05, | |
| "loss": 0.0103, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.26294946934896246, | |
| "grad_norm": 0.006131887435913086, | |
| "learning_rate": 1.8284894869357878e-05, | |
| "loss": 0.0048, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.2661175352447331, | |
| "grad_norm": 0.005711342208087444, | |
| "learning_rate": 1.8251546529271507e-05, | |
| "loss": 0.0013, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.2692856011405037, | |
| "grad_norm": 0.08911605924367905, | |
| "learning_rate": 1.8218198189185133e-05, | |
| "loss": 0.0016, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.2724536670362743, | |
| "grad_norm": 0.049607861787080765, | |
| "learning_rate": 1.8184849849098762e-05, | |
| "loss": 0.0071, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.275621732932045, | |
| "grad_norm": 0.003345210338011384, | |
| "learning_rate": 1.815150150901239e-05, | |
| "loss": 0.0009, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.2787897988278156, | |
| "grad_norm": 0.002960205776616931, | |
| "learning_rate": 1.8118153168926017e-05, | |
| "loss": 0.0033, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.28195786472358625, | |
| "grad_norm": 0.007276841904968023, | |
| "learning_rate": 1.8084804828839646e-05, | |
| "loss": 0.0005, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.28512593061935687, | |
| "grad_norm": 0.0012576623121276498, | |
| "learning_rate": 1.8051456488753275e-05, | |
| "loss": 0.0011, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.2882939965151275, | |
| "grad_norm": 0.001386396586894989, | |
| "learning_rate": 1.80181081486669e-05, | |
| "loss": 0.0005, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.29146206241089817, | |
| "grad_norm": 0.0024564603809267282, | |
| "learning_rate": 1.798475980858053e-05, | |
| "loss": 0.004, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.2946301283066688, | |
| "grad_norm": 0.00010097989434143528, | |
| "learning_rate": 1.7951411468494156e-05, | |
| "loss": 0.001, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.2977981942024394, | |
| "grad_norm": 0.0008612315286882222, | |
| "learning_rate": 1.7918063128407785e-05, | |
| "loss": 0.0028, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.30096626009821004, | |
| "grad_norm": 0.09617114812135696, | |
| "learning_rate": 1.7884714788321414e-05, | |
| "loss": 0.0011, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.30413432599398066, | |
| "grad_norm": 0.002843310823664069, | |
| "learning_rate": 1.785136644823504e-05, | |
| "loss": 0.0011, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.3073023918897513, | |
| "grad_norm": 0.03222779557108879, | |
| "learning_rate": 1.781801810814867e-05, | |
| "loss": 0.0013, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.31047045778552196, | |
| "grad_norm": 0.002599438652396202, | |
| "learning_rate": 1.7784669768062298e-05, | |
| "loss": 0.0009, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.3136385236812926, | |
| "grad_norm": 9.065222002391238e-06, | |
| "learning_rate": 1.7751321427975924e-05, | |
| "loss": 0.0141, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.3168065895770632, | |
| "grad_norm": 0.136834517121315, | |
| "learning_rate": 1.7717973087889553e-05, | |
| "loss": 0.0038, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.3199746554728338, | |
| "grad_norm": 0.004138951655477285, | |
| "learning_rate": 1.768462474780318e-05, | |
| "loss": 0.0026, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.32314272136860445, | |
| "grad_norm": 7.845780783100054e-05, | |
| "learning_rate": 1.7651276407716808e-05, | |
| "loss": 0.0108, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.3263107872643751, | |
| "grad_norm": 0.00233863340690732, | |
| "learning_rate": 1.7617928067630434e-05, | |
| "loss": 0.0032, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.32947885316014575, | |
| "grad_norm": 5.531415808945894e-05, | |
| "learning_rate": 1.7584579727544063e-05, | |
| "loss": 0.0012, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.33264691905591637, | |
| "grad_norm": 0.002138437470421195, | |
| "learning_rate": 1.755123138745769e-05, | |
| "loss": 0.0011, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.335814984951687, | |
| "grad_norm": 0.007583692204207182, | |
| "learning_rate": 1.7517883047371318e-05, | |
| "loss": 0.0025, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.3389830508474576, | |
| "grad_norm": 0.0399901457130909, | |
| "learning_rate": 1.7484534707284947e-05, | |
| "loss": 0.0026, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.34215111674322823, | |
| "grad_norm": 0.10378504544496536, | |
| "learning_rate": 1.7451186367198573e-05, | |
| "loss": 0.0027, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.3453191826389989, | |
| "grad_norm": 0.002780862618237734, | |
| "learning_rate": 1.74178380271122e-05, | |
| "loss": 0.0006, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.34848724853476953, | |
| "grad_norm": 0.3812588155269623, | |
| "learning_rate": 1.738448968702583e-05, | |
| "loss": 0.0041, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.35165531443054016, | |
| "grad_norm": 2.2577160052605905e-05, | |
| "learning_rate": 1.7351141346939457e-05, | |
| "loss": 0.0012, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.3548233803263108, | |
| "grad_norm": 0.0003742675471585244, | |
| "learning_rate": 1.7317793006853086e-05, | |
| "loss": 0.0036, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.3579914462220814, | |
| "grad_norm": 0.0007130543817766011, | |
| "learning_rate": 1.7284444666766715e-05, | |
| "loss": 0.0009, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.3611595121178521, | |
| "grad_norm": 0.022633198648691177, | |
| "learning_rate": 1.725109632668034e-05, | |
| "loss": 0.0013, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.3643275780136227, | |
| "grad_norm": 0.0017543600406497717, | |
| "learning_rate": 1.721774798659397e-05, | |
| "loss": 0.0045, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.3674956439093933, | |
| "grad_norm": 2.876186408684589e-05, | |
| "learning_rate": 1.7184399646507595e-05, | |
| "loss": 0.0009, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.37066370980516394, | |
| "grad_norm": 0.02501189149916172, | |
| "learning_rate": 1.7151051306421225e-05, | |
| "loss": 0.0031, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.37383177570093457, | |
| "grad_norm": 0.0001932688319357112, | |
| "learning_rate": 1.7117702966334854e-05, | |
| "loss": 0.0025, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.3769998415967052, | |
| "grad_norm": 0.006397546734660864, | |
| "learning_rate": 1.708435462624848e-05, | |
| "loss": 0.0028, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.38016790749247587, | |
| "grad_norm": 0.29939302802085876, | |
| "learning_rate": 1.705100628616211e-05, | |
| "loss": 0.0006, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.3833359733882465, | |
| "grad_norm": 0.00018301274394616485, | |
| "learning_rate": 1.7017657946075738e-05, | |
| "loss": 0.0009, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.3865040392840171, | |
| "grad_norm": 0.16564935445785522, | |
| "learning_rate": 1.6984309605989363e-05, | |
| "loss": 0.0073, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.38967210517978773, | |
| "grad_norm": 0.012698143720626831, | |
| "learning_rate": 1.6950961265902993e-05, | |
| "loss": 0.0045, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.39284017107555835, | |
| "grad_norm": 0.0014803281519562006, | |
| "learning_rate": 1.691761292581662e-05, | |
| "loss": 0.0003, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.39600823697132903, | |
| "grad_norm": 0.009712688624858856, | |
| "learning_rate": 1.6884264585730244e-05, | |
| "loss": 0.0009, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.39917630286709965, | |
| "grad_norm": 0.0013824573252350092, | |
| "learning_rate": 1.6850916245643873e-05, | |
| "loss": 0.0003, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.4023443687628703, | |
| "grad_norm": 0.23537451028823853, | |
| "learning_rate": 1.6817567905557502e-05, | |
| "loss": 0.001, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.4055124346586409, | |
| "grad_norm": 0.031864460557699203, | |
| "learning_rate": 1.6784219565471128e-05, | |
| "loss": 0.0042, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.4086805005544115, | |
| "grad_norm": 0.004931151866912842, | |
| "learning_rate": 1.6750871225384757e-05, | |
| "loss": 0.0098, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.41184856645018214, | |
| "grad_norm": 0.11204478144645691, | |
| "learning_rate": 1.6717522885298386e-05, | |
| "loss": 0.0014, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.4150166323459528, | |
| "grad_norm": 0.002025209367275238, | |
| "learning_rate": 1.6684174545212012e-05, | |
| "loss": 0.0006, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.41818469824172344, | |
| "grad_norm": 0.02806149795651436, | |
| "learning_rate": 1.665082620512564e-05, | |
| "loss": 0.0013, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.42135276413749406, | |
| "grad_norm": 0.003418268170207739, | |
| "learning_rate": 1.661747786503927e-05, | |
| "loss": 0.001, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.4245208300332647, | |
| "grad_norm": 0.00011387121776351705, | |
| "learning_rate": 1.6584129524952896e-05, | |
| "loss": 0.0012, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.4276888959290353, | |
| "grad_norm": 0.008205407299101353, | |
| "learning_rate": 1.6550781184866525e-05, | |
| "loss": 0.001, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.43085696182480593, | |
| "grad_norm": 0.06980779021978378, | |
| "learning_rate": 1.651743284478015e-05, | |
| "loss": 0.0011, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.4340250277205766, | |
| "grad_norm": 0.027860773727297783, | |
| "learning_rate": 1.648408450469378e-05, | |
| "loss": 0.0015, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.43719309361634723, | |
| "grad_norm": 0.0005275747971609235, | |
| "learning_rate": 1.645073616460741e-05, | |
| "loss": 0.0004, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.44036115951211785, | |
| "grad_norm": 0.0019433089764788747, | |
| "learning_rate": 1.6417387824521035e-05, | |
| "loss": 0.0008, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.44352922540788847, | |
| "grad_norm": 0.0017472075996920466, | |
| "learning_rate": 1.6384039484434664e-05, | |
| "loss": 0.001, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.4466972913036591, | |
| "grad_norm": 0.00724576273933053, | |
| "learning_rate": 1.6350691144348293e-05, | |
| "loss": 0.0009, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.44986535719942977, | |
| "grad_norm": 8.959687693277374e-05, | |
| "learning_rate": 1.631734280426192e-05, | |
| "loss": 0.0006, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.4530334230952004, | |
| "grad_norm": 0.0013277491088956594, | |
| "learning_rate": 1.6283994464175548e-05, | |
| "loss": 0.0012, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.456201488990971, | |
| "grad_norm": 0.0497293621301651, | |
| "learning_rate": 1.6250646124089174e-05, | |
| "loss": 0.0018, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.45936955488674164, | |
| "grad_norm": 0.003778190817683935, | |
| "learning_rate": 1.6217297784002803e-05, | |
| "loss": 0.0006, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.46253762078251226, | |
| "grad_norm": 0.022186335176229477, | |
| "learning_rate": 1.6183949443916432e-05, | |
| "loss": 0.0016, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.4657056866782829, | |
| "grad_norm": 8.587163392803632e-06, | |
| "learning_rate": 1.6150601103830058e-05, | |
| "loss": 0.0013, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.46887375257405356, | |
| "grad_norm": 0.1394946575164795, | |
| "learning_rate": 1.6117252763743687e-05, | |
| "loss": 0.0005, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.4720418184698242, | |
| "grad_norm": 0.06866718083620071, | |
| "learning_rate": 1.6083904423657313e-05, | |
| "loss": 0.0003, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.4752098843655948, | |
| "grad_norm": 0.0010026495438069105, | |
| "learning_rate": 1.6050556083570942e-05, | |
| "loss": 0.0007, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.4783779502613654, | |
| "grad_norm": 0.002642757259309292, | |
| "learning_rate": 1.6017207743484568e-05, | |
| "loss": 0.0009, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.48154601615713605, | |
| "grad_norm": 0.001733070588670671, | |
| "learning_rate": 1.5983859403398197e-05, | |
| "loss": 0.001, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.4847140820529067, | |
| "grad_norm": 7.795493729645386e-05, | |
| "learning_rate": 1.5950511063311826e-05, | |
| "loss": 0.002, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.48788214794867735, | |
| "grad_norm": 0.1361948698759079, | |
| "learning_rate": 1.591716272322545e-05, | |
| "loss": 0.0019, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.49105021384444797, | |
| "grad_norm": 0.0015777755761519074, | |
| "learning_rate": 1.588381438313908e-05, | |
| "loss": 0.0007, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.4942182797402186, | |
| "grad_norm": 0.0002617322898004204, | |
| "learning_rate": 1.5850466043052706e-05, | |
| "loss": 0.0006, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.4973863456359892, | |
| "grad_norm": 0.0005603536847047508, | |
| "learning_rate": 1.5817117702966336e-05, | |
| "loss": 0.0021, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.5005544115317598, | |
| "grad_norm": 0.007399669848382473, | |
| "learning_rate": 1.5783769362879965e-05, | |
| "loss": 0.0017, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.5037224774275305, | |
| "grad_norm": 0.0003950314421672374, | |
| "learning_rate": 1.575042102279359e-05, | |
| "loss": 0.0024, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.5068905433233011, | |
| "grad_norm": 0.00017947182641364634, | |
| "learning_rate": 1.571707268270722e-05, | |
| "loss": 0.0014, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.5100586092190718, | |
| "grad_norm": 0.010381902568042278, | |
| "learning_rate": 1.568372434262085e-05, | |
| "loss": 0.0035, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.5132266751148424, | |
| "grad_norm": 0.0018313485197722912, | |
| "learning_rate": 1.5650376002534474e-05, | |
| "loss": 0.0008, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.516394741010613, | |
| "grad_norm": 8.66261325427331e-05, | |
| "learning_rate": 1.5617027662448104e-05, | |
| "loss": 0.0009, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.5195628069063837, | |
| "grad_norm": 0.0049484893679618835, | |
| "learning_rate": 1.5583679322361733e-05, | |
| "loss": 0.0013, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.5227308728021542, | |
| "grad_norm": 0.0376594252884388, | |
| "learning_rate": 1.555033098227536e-05, | |
| "loss": 0.0022, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.5258989386979249, | |
| "grad_norm": 0.0036107038613408804, | |
| "learning_rate": 1.5516982642188988e-05, | |
| "loss": 0.0007, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.5290670045936956, | |
| "grad_norm": 0.0010309889912605286, | |
| "learning_rate": 1.5483634302102613e-05, | |
| "loss": 0.0002, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.5322350704894662, | |
| "grad_norm": 0.0025457690935581923, | |
| "learning_rate": 1.5450285962016243e-05, | |
| "loss": 0.0039, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.5354031363852368, | |
| "grad_norm": 6.187368126120418e-05, | |
| "learning_rate": 1.541693762192987e-05, | |
| "loss": 0.0005, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.5385712022810074, | |
| "grad_norm": 0.001785182161256671, | |
| "learning_rate": 1.5383589281843497e-05, | |
| "loss": 0.0011, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.5417392681767781, | |
| "grad_norm": 8.733890717849135e-05, | |
| "learning_rate": 1.5350240941757127e-05, | |
| "loss": 0.0025, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.5449073340725487, | |
| "grad_norm": 0.334231436252594, | |
| "learning_rate": 1.5316892601670752e-05, | |
| "loss": 0.0006, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.5480753999683193, | |
| "grad_norm": 0.0005896109505556524, | |
| "learning_rate": 1.528354426158438e-05, | |
| "loss": 0.0011, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.55124346586409, | |
| "grad_norm": 0.001895858091302216, | |
| "learning_rate": 1.5250195921498007e-05, | |
| "loss": 0.0004, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.5544115317598606, | |
| "grad_norm": 0.0020244682673364878, | |
| "learning_rate": 1.5216847581411636e-05, | |
| "loss": 0.0007, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.5575795976556313, | |
| "grad_norm": 0.005685487762093544, | |
| "learning_rate": 1.5183499241325264e-05, | |
| "loss": 0.0004, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.5607476635514018, | |
| "grad_norm": 0.0007782336906529963, | |
| "learning_rate": 1.5150150901238891e-05, | |
| "loss": 0.0085, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.5639157294471725, | |
| "grad_norm": 0.0003303899138700217, | |
| "learning_rate": 1.5116802561152519e-05, | |
| "loss": 0.0016, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.5670837953429432, | |
| "grad_norm": 0.004449727479368448, | |
| "learning_rate": 1.5083454221066148e-05, | |
| "loss": 0.0009, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.5702518612387137, | |
| "grad_norm": 0.002211131388321519, | |
| "learning_rate": 1.5050105880979775e-05, | |
| "loss": 0.0005, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.5734199271344844, | |
| "grad_norm": 0.00011551743227755651, | |
| "learning_rate": 1.5016757540893403e-05, | |
| "loss": 0.0009, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.576587993030255, | |
| "grad_norm": 2.151384796889033e-05, | |
| "learning_rate": 1.4983409200807032e-05, | |
| "loss": 0.0005, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.5797560589260257, | |
| "grad_norm": 0.1097760945558548, | |
| "learning_rate": 1.4950060860720659e-05, | |
| "loss": 0.0019, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.5829241248217963, | |
| "grad_norm": 0.004903141874819994, | |
| "learning_rate": 1.4916712520634287e-05, | |
| "loss": 0.0015, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.5860921907175669, | |
| "grad_norm": 0.0018493414390832186, | |
| "learning_rate": 1.4883364180547914e-05, | |
| "loss": 0.0008, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.5892602566133376, | |
| "grad_norm": 0.004849809221923351, | |
| "learning_rate": 1.4850015840461543e-05, | |
| "loss": 0.0011, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.5924283225091082, | |
| "grad_norm": 0.5303362607955933, | |
| "learning_rate": 1.481666750037517e-05, | |
| "loss": 0.0007, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.5955963884048788, | |
| "grad_norm": 0.01051903236657381, | |
| "learning_rate": 1.4783319160288798e-05, | |
| "loss": 0.0011, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.5987644543006495, | |
| "grad_norm": 0.001327801146544516, | |
| "learning_rate": 1.4749970820202425e-05, | |
| "loss": 0.0005, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.6019325201964201, | |
| "grad_norm": 0.002282701199874282, | |
| "learning_rate": 1.4716622480116055e-05, | |
| "loss": 0.0023, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.6051005860921908, | |
| "grad_norm": 0.000489629979711026, | |
| "learning_rate": 1.4683274140029682e-05, | |
| "loss": 0.0028, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.6082686519879613, | |
| "grad_norm": 0.09267735481262207, | |
| "learning_rate": 1.464992579994331e-05, | |
| "loss": 0.0008, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.611436717883732, | |
| "grad_norm": 6.615820893784985e-05, | |
| "learning_rate": 1.4616577459856937e-05, | |
| "loss": 0.0007, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.6146047837795026, | |
| "grad_norm": 0.01198121253401041, | |
| "learning_rate": 1.4583229119770566e-05, | |
| "loss": 0.0005, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.6177728496752732, | |
| "grad_norm": 0.0003829908964689821, | |
| "learning_rate": 1.4549880779684193e-05, | |
| "loss": 0.0003, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.6209409155710439, | |
| "grad_norm": 0.04535700008273125, | |
| "learning_rate": 1.451653243959782e-05, | |
| "loss": 0.0009, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.6241089814668145, | |
| "grad_norm": 0.17491640150547028, | |
| "learning_rate": 1.4483184099511447e-05, | |
| "loss": 0.0024, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.6272770473625852, | |
| "grad_norm": 0.0015510759549215436, | |
| "learning_rate": 1.4449835759425074e-05, | |
| "loss": 0.0008, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.6304451132583557, | |
| "grad_norm": 0.039857182651758194, | |
| "learning_rate": 1.4416487419338703e-05, | |
| "loss": 0.0016, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.6336131791541264, | |
| "grad_norm": 0.0024708157870918512, | |
| "learning_rate": 1.438313907925233e-05, | |
| "loss": 0.0007, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.6367812450498971, | |
| "grad_norm": 2.6672966480255127, | |
| "learning_rate": 1.4349790739165958e-05, | |
| "loss": 0.0227, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.6399493109456676, | |
| "grad_norm": 0.001121210865676403, | |
| "learning_rate": 1.4316442399079587e-05, | |
| "loss": 0.0007, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.6431173768414383, | |
| "grad_norm": 0.00026244454784318805, | |
| "learning_rate": 1.4283094058993215e-05, | |
| "loss": 0.0008, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.6462854427372089, | |
| "grad_norm": 0.008422612212598324, | |
| "learning_rate": 1.4249745718906842e-05, | |
| "loss": 0.0017, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.6494535086329796, | |
| "grad_norm": 0.003273066831752658, | |
| "learning_rate": 1.421639737882047e-05, | |
| "loss": 0.0025, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.6526215745287502, | |
| "grad_norm": 0.004011265933513641, | |
| "learning_rate": 1.4183049038734099e-05, | |
| "loss": 0.0005, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.6557896404245208, | |
| "grad_norm": 2.8203889087308198e-05, | |
| "learning_rate": 1.4149700698647726e-05, | |
| "loss": 0.0006, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.6589577063202915, | |
| "grad_norm": 0.028400663286447525, | |
| "learning_rate": 1.4116352358561354e-05, | |
| "loss": 0.0005, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.6621257722160621, | |
| "grad_norm": 0.005530569702386856, | |
| "learning_rate": 1.4083004018474981e-05, | |
| "loss": 0.0007, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.6652938381118327, | |
| "grad_norm": 0.0010687313042581081, | |
| "learning_rate": 1.404965567838861e-05, | |
| "loss": 0.0012, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.6684619040076034, | |
| "grad_norm": 0.02122735045850277, | |
| "learning_rate": 1.4016307338302238e-05, | |
| "loss": 0.0003, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.671629969903374, | |
| "grad_norm": 0.0008722911006771028, | |
| "learning_rate": 1.3982958998215865e-05, | |
| "loss": 0.0021, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.6747980357991447, | |
| "grad_norm": 0.06774666905403137, | |
| "learning_rate": 1.3949610658129492e-05, | |
| "loss": 0.0007, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.6779661016949152, | |
| "grad_norm": 0.000172132917214185, | |
| "learning_rate": 1.3916262318043122e-05, | |
| "loss": 0.0005, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.6811341675906859, | |
| "grad_norm": 0.0020967440214008093, | |
| "learning_rate": 1.3882913977956749e-05, | |
| "loss": 0.0004, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.6843022334864565, | |
| "grad_norm": 0.00023208513448480517, | |
| "learning_rate": 1.3849565637870376e-05, | |
| "loss": 0.0007, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.6874702993822271, | |
| "grad_norm": 0.0010145423002541065, | |
| "learning_rate": 1.3816217297784006e-05, | |
| "loss": 0.0007, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.6906383652779978, | |
| "grad_norm": 0.017040489241480827, | |
| "learning_rate": 1.3782868957697633e-05, | |
| "loss": 0.001, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.6938064311737684, | |
| "grad_norm": 0.0001387975353281945, | |
| "learning_rate": 1.374952061761126e-05, | |
| "loss": 0.001, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.6969744970695391, | |
| "grad_norm": 0.0011847980786114931, | |
| "learning_rate": 1.3716172277524886e-05, | |
| "loss": 0.0015, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.7001425629653096, | |
| "grad_norm": 0.01234695129096508, | |
| "learning_rate": 1.3682823937438514e-05, | |
| "loss": 0.0054, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.7033106288610803, | |
| "grad_norm": 0.07681901007890701, | |
| "learning_rate": 1.3649475597352143e-05, | |
| "loss": 0.0004, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.706478694756851, | |
| "grad_norm": 0.018233176320791245, | |
| "learning_rate": 1.361612725726577e-05, | |
| "loss": 0.0005, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.7096467606526216, | |
| "grad_norm": 0.07004215568304062, | |
| "learning_rate": 1.3582778917179398e-05, | |
| "loss": 0.0007, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.7128148265483922, | |
| "grad_norm": 0.0069395024329423904, | |
| "learning_rate": 1.3549430577093025e-05, | |
| "loss": 0.0006, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.7159828924441628, | |
| "grad_norm": 0.0007382028270512819, | |
| "learning_rate": 1.3516082237006654e-05, | |
| "loss": 0.0053, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.7191509583399335, | |
| "grad_norm": 0.03294491395354271, | |
| "learning_rate": 1.3482733896920282e-05, | |
| "loss": 0.0005, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.7223190242357042, | |
| "grad_norm": 0.22843457758426666, | |
| "learning_rate": 1.3449385556833909e-05, | |
| "loss": 0.001, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.7254870901314747, | |
| "grad_norm": 0.0002898550301324576, | |
| "learning_rate": 1.3416037216747537e-05, | |
| "loss": 0.0004, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.7286551560272454, | |
| "grad_norm": 0.0034780879504978657, | |
| "learning_rate": 1.3382688876661166e-05, | |
| "loss": 0.0016, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.731823221923016, | |
| "grad_norm": 0.04466221109032631, | |
| "learning_rate": 1.3349340536574793e-05, | |
| "loss": 0.0043, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.7349912878187866, | |
| "grad_norm": 9.875125215330627e-06, | |
| "learning_rate": 1.331599219648842e-05, | |
| "loss": 0.0019, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.7381593537145572, | |
| "grad_norm": 0.5125452876091003, | |
| "learning_rate": 1.3282643856402048e-05, | |
| "loss": 0.0011, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.7413274196103279, | |
| "grad_norm": 0.02045290358364582, | |
| "learning_rate": 1.3249295516315677e-05, | |
| "loss": 0.0004, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.7444954855060986, | |
| "grad_norm": 0.04965211823582649, | |
| "learning_rate": 1.3215947176229305e-05, | |
| "loss": 0.0008, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.7476635514018691, | |
| "grad_norm": 0.09520922601222992, | |
| "learning_rate": 1.3182598836142932e-05, | |
| "loss": 0.0023, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.7508316172976398, | |
| "grad_norm": 4.333815923018847e-06, | |
| "learning_rate": 1.3149250496056561e-05, | |
| "loss": 0.0005, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.7539996831934104, | |
| "grad_norm": 0.21383151412010193, | |
| "learning_rate": 1.3115902155970189e-05, | |
| "loss": 0.0006, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.757167749089181, | |
| "grad_norm": 0.0006424040184356272, | |
| "learning_rate": 1.3082553815883816e-05, | |
| "loss": 0.0004, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.7603358149849517, | |
| "grad_norm": 0.0008101545972749591, | |
| "learning_rate": 1.3049205475797443e-05, | |
| "loss": 0.0003, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.7635038808807223, | |
| "grad_norm": 0.0026161724235862494, | |
| "learning_rate": 1.3015857135711073e-05, | |
| "loss": 0.0006, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.766671946776493, | |
| "grad_norm": 9.565529762767255e-05, | |
| "learning_rate": 1.29825087956247e-05, | |
| "loss": 0.0009, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.7698400126722635, | |
| "grad_norm": 0.026133345440030098, | |
| "learning_rate": 1.2949160455538327e-05, | |
| "loss": 0.0055, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.7730080785680342, | |
| "grad_norm": 0.0001082066519302316, | |
| "learning_rate": 1.2915812115451953e-05, | |
| "loss": 0.0007, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.7761761444638049, | |
| "grad_norm": 0.0007577472133561969, | |
| "learning_rate": 1.288246377536558e-05, | |
| "loss": 0.001, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.7793442103595755, | |
| "grad_norm": 0.0003420517314225435, | |
| "learning_rate": 1.284911543527921e-05, | |
| "loss": 0.0014, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.7825122762553461, | |
| "grad_norm": 0.006726464722305536, | |
| "learning_rate": 1.2815767095192837e-05, | |
| "loss": 0.0008, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.7856803421511167, | |
| "grad_norm": 0.0014214670518413186, | |
| "learning_rate": 1.2782418755106465e-05, | |
| "loss": 0.0011, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.7888484080468874, | |
| "grad_norm": 0.006449591834098101, | |
| "learning_rate": 1.2749070415020092e-05, | |
| "loss": 0.0026, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.7920164739426581, | |
| "grad_norm": 0.0005304469959810376, | |
| "learning_rate": 1.2715722074933721e-05, | |
| "loss": 0.0026, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.7951845398384286, | |
| "grad_norm": 0.0006021055160090327, | |
| "learning_rate": 1.2682373734847349e-05, | |
| "loss": 0.0008, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.7983526057341993, | |
| "grad_norm": 0.0025042754132300615, | |
| "learning_rate": 1.2649025394760976e-05, | |
| "loss": 0.001, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.8015206716299699, | |
| "grad_norm": 0.013771702535450459, | |
| "learning_rate": 1.2615677054674605e-05, | |
| "loss": 0.0014, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.8046887375257405, | |
| "grad_norm": 0.007065094541758299, | |
| "learning_rate": 1.2582328714588233e-05, | |
| "loss": 0.0005, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.8078568034215111, | |
| "grad_norm": 0.0009276416385546327, | |
| "learning_rate": 1.254898037450186e-05, | |
| "loss": 0.0018, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.8110248693172818, | |
| "grad_norm": 0.026806719601154327, | |
| "learning_rate": 1.2515632034415487e-05, | |
| "loss": 0.0011, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.8141929352130525, | |
| "grad_norm": 0.00029596476815640926, | |
| "learning_rate": 1.2482283694329117e-05, | |
| "loss": 0.0036, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.817361001108823, | |
| "grad_norm": 0.0062362137250602245, | |
| "learning_rate": 1.2448935354242744e-05, | |
| "loss": 0.0009, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.8205290670045937, | |
| "grad_norm": 0.005555544979870319, | |
| "learning_rate": 1.2415587014156371e-05, | |
| "loss": 0.0019, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.8236971329003643, | |
| "grad_norm": 0.005463754292577505, | |
| "learning_rate": 1.2382238674069999e-05, | |
| "loss": 0.0014, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.826865198796135, | |
| "grad_norm": 0.0022091898135840893, | |
| "learning_rate": 1.2348890333983628e-05, | |
| "loss": 0.0005, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.8300332646919056, | |
| "grad_norm": 0.12676991522312164, | |
| "learning_rate": 1.2315541993897255e-05, | |
| "loss": 0.0014, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.8332013305876762, | |
| "grad_norm": 0.0015652469592168927, | |
| "learning_rate": 1.2282193653810883e-05, | |
| "loss": 0.0026, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.8363693964834469, | |
| "grad_norm": 0.00408145971596241, | |
| "learning_rate": 1.224884531372451e-05, | |
| "loss": 0.0011, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.8395374623792174, | |
| "grad_norm": 0.00045841402607038617, | |
| "learning_rate": 1.221549697363814e-05, | |
| "loss": 0.0216, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.8427055282749881, | |
| "grad_norm": 0.02451806142926216, | |
| "learning_rate": 1.2182148633551767e-05, | |
| "loss": 0.002, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.8458735941707588, | |
| "grad_norm": 0.00027516239788383245, | |
| "learning_rate": 1.2148800293465394e-05, | |
| "loss": 0.001, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.8490416600665294, | |
| "grad_norm": 1.4936133084120229e-05, | |
| "learning_rate": 1.211545195337902e-05, | |
| "loss": 0.0048, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.8522097259623, | |
| "grad_norm": 0.00033001156407408416, | |
| "learning_rate": 1.2082103613292648e-05, | |
| "loss": 0.0083, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.8553777918580706, | |
| "grad_norm": 0.017794223502278328, | |
| "learning_rate": 1.2048755273206277e-05, | |
| "loss": 0.0011, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.8585458577538413, | |
| "grad_norm": 0.0008482965640723705, | |
| "learning_rate": 1.2015406933119904e-05, | |
| "loss": 0.0054, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.8617139236496119, | |
| "grad_norm": 0.01870151422917843, | |
| "learning_rate": 1.1982058593033532e-05, | |
| "loss": 0.015, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.8648819895453825, | |
| "grad_norm": 10.855655670166016, | |
| "learning_rate": 1.194871025294716e-05, | |
| "loss": 0.003, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.8680500554411532, | |
| "grad_norm": 0.029217010363936424, | |
| "learning_rate": 1.1915361912860788e-05, | |
| "loss": 0.0024, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.8712181213369238, | |
| "grad_norm": 0.03195062279701233, | |
| "learning_rate": 1.1882013572774416e-05, | |
| "loss": 0.0014, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.8743861872326945, | |
| "grad_norm": 0.005607489962130785, | |
| "learning_rate": 1.1848665232688043e-05, | |
| "loss": 0.0029, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.877554253128465, | |
| "grad_norm": 0.0009394127409905195, | |
| "learning_rate": 1.1815316892601672e-05, | |
| "loss": 0.0041, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.8807223190242357, | |
| "grad_norm": 0.004952901508659124, | |
| "learning_rate": 1.17819685525153e-05, | |
| "loss": 0.0007, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.8838903849200064, | |
| "grad_norm": 0.0013988588470965624, | |
| "learning_rate": 1.1748620212428927e-05, | |
| "loss": 0.0019, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.8870584508157769, | |
| "grad_norm": 0.0029521535616368055, | |
| "learning_rate": 1.1715271872342554e-05, | |
| "loss": 0.001, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.8902265167115476, | |
| "grad_norm": 0.0014934735372662544, | |
| "learning_rate": 1.1681923532256184e-05, | |
| "loss": 0.0006, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.8933945826073182, | |
| "grad_norm": 0.12357547879219055, | |
| "learning_rate": 1.1648575192169811e-05, | |
| "loss": 0.0008, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.8965626485030889, | |
| "grad_norm": 0.0003371001803316176, | |
| "learning_rate": 1.1615226852083438e-05, | |
| "loss": 0.001, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.8997307143988595, | |
| "grad_norm": 0.01792779751121998, | |
| "learning_rate": 1.1581878511997066e-05, | |
| "loss": 0.0014, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.9028987802946301, | |
| "grad_norm": 0.031045174226164818, | |
| "learning_rate": 1.1548530171910695e-05, | |
| "loss": 0.0019, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.9060668461904008, | |
| "grad_norm": 0.031731363385915756, | |
| "learning_rate": 1.1515181831824322e-05, | |
| "loss": 0.0005, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.9092349120861714, | |
| "grad_norm": 0.003199261147528887, | |
| "learning_rate": 1.148183349173795e-05, | |
| "loss": 0.0021, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.912402977981942, | |
| "grad_norm": 4.737121344078332e-05, | |
| "learning_rate": 1.1448485151651579e-05, | |
| "loss": 0.0006, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.9155710438777127, | |
| "grad_norm": 0.012714708223938942, | |
| "learning_rate": 1.1415136811565206e-05, | |
| "loss": 0.0013, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.9187391097734833, | |
| "grad_norm": 0.00023968149616848677, | |
| "learning_rate": 1.1381788471478834e-05, | |
| "loss": 0.0013, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.921907175669254, | |
| "grad_norm": 0.00018393975915387273, | |
| "learning_rate": 1.1348440131392461e-05, | |
| "loss": 0.003, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.9250752415650245, | |
| "grad_norm": 31.17757225036621, | |
| "learning_rate": 1.1315091791306087e-05, | |
| "loss": 0.0073, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.9282433074607952, | |
| "grad_norm": 0.7764952182769775, | |
| "learning_rate": 1.1281743451219716e-05, | |
| "loss": 0.0006, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.9314113733565658, | |
| "grad_norm": 0.02078479342162609, | |
| "learning_rate": 1.1248395111133344e-05, | |
| "loss": 0.0055, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.9345794392523364, | |
| "grad_norm": 0.024362290278077126, | |
| "learning_rate": 1.1215046771046971e-05, | |
| "loss": 0.0015, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.9377475051481071, | |
| "grad_norm": 0.006054306868463755, | |
| "learning_rate": 1.1181698430960599e-05, | |
| "loss": 0.0006, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.9409155710438777, | |
| "grad_norm": 0.0014811428263783455, | |
| "learning_rate": 1.1148350090874228e-05, | |
| "loss": 0.0016, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.9440836369396484, | |
| "grad_norm": 0.00024473376106470823, | |
| "learning_rate": 1.1115001750787855e-05, | |
| "loss": 0.0048, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.9472517028354189, | |
| "grad_norm": 0.9019193053245544, | |
| "learning_rate": 1.1081653410701483e-05, | |
| "loss": 0.0012, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.9504197687311896, | |
| "grad_norm": 0.005399579647928476, | |
| "learning_rate": 1.104830507061511e-05, | |
| "loss": 0.0017, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.9535878346269603, | |
| "grad_norm": 9.696155757410452e-05, | |
| "learning_rate": 1.1014956730528739e-05, | |
| "loss": 0.001, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.9567559005227309, | |
| "grad_norm": 0.004988879431039095, | |
| "learning_rate": 1.0981608390442367e-05, | |
| "loss": 0.0009, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.9599239664185015, | |
| "grad_norm": 0.0014560155104845762, | |
| "learning_rate": 1.0948260050355994e-05, | |
| "loss": 0.0046, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.9630920323142721, | |
| "grad_norm": 0.0015756795182824135, | |
| "learning_rate": 1.0914911710269623e-05, | |
| "loss": 0.0019, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.9662600982100428, | |
| "grad_norm": 0.0033032239880412817, | |
| "learning_rate": 1.088156337018325e-05, | |
| "loss": 0.0034, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.9694281641058135, | |
| "grad_norm": 0.001508047222159803, | |
| "learning_rate": 1.0848215030096878e-05, | |
| "loss": 0.0015, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.972596230001584, | |
| "grad_norm": 0.005174440797418356, | |
| "learning_rate": 1.0814866690010505e-05, | |
| "loss": 0.0007, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.9757642958973547, | |
| "grad_norm": 0.08969740569591522, | |
| "learning_rate": 1.0781518349924135e-05, | |
| "loss": 0.0031, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.9789323617931253, | |
| "grad_norm": 1.643660471017938e-05, | |
| "learning_rate": 1.0748170009837762e-05, | |
| "loss": 0.0039, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.9821004276888959, | |
| "grad_norm": 0.0014628027565777302, | |
| "learning_rate": 1.071482166975139e-05, | |
| "loss": 0.0011, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.9852684935846666, | |
| "grad_norm": 0.00014162520528770983, | |
| "learning_rate": 1.0681473329665017e-05, | |
| "loss": 0.0014, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.9884365594804372, | |
| "grad_norm": 0.0008995328098535538, | |
| "learning_rate": 1.0648124989578646e-05, | |
| "loss": 0.0015, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.9916046253762079, | |
| "grad_norm": 0.000977379735559225, | |
| "learning_rate": 1.0614776649492273e-05, | |
| "loss": 0.0018, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.9947726912719784, | |
| "grad_norm": 0.0006964199710637331, | |
| "learning_rate": 1.0581428309405901e-05, | |
| "loss": 0.0349, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.9979407571677491, | |
| "grad_norm": 0.001510333619080484, | |
| "learning_rate": 1.0548079969319528e-05, | |
| "loss": 0.0007, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.0011088230635197, | |
| "grad_norm": 0.0027738306671380997, | |
| "learning_rate": 1.0514731629233154e-05, | |
| "loss": 0.0014, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 1.0042768889592903, | |
| "grad_norm": 0.0002610177034512162, | |
| "learning_rate": 1.0481383289146783e-05, | |
| "loss": 0.0024, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 1.007444954855061, | |
| "grad_norm": 0.0035766460932791233, | |
| "learning_rate": 1.044803494906041e-05, | |
| "loss": 0.0013, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 1.0106130207508317, | |
| "grad_norm": 0.001029430190101266, | |
| "learning_rate": 1.0414686608974038e-05, | |
| "loss": 0.0084, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 1.0137810866466022, | |
| "grad_norm": 0.0004998042713850737, | |
| "learning_rate": 1.0381338268887665e-05, | |
| "loss": 0.0016, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.0169491525423728, | |
| "grad_norm": 0.03187868371605873, | |
| "learning_rate": 1.0347989928801295e-05, | |
| "loss": 0.0007, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 1.0201172184381435, | |
| "grad_norm": 0.0006562778726220131, | |
| "learning_rate": 1.0314641588714922e-05, | |
| "loss": 0.0018, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 1.0232852843339142, | |
| "grad_norm": 0.0015058261342346668, | |
| "learning_rate": 1.028129324862855e-05, | |
| "loss": 0.0017, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 1.0264533502296849, | |
| "grad_norm": 0.00016952259466052055, | |
| "learning_rate": 1.0247944908542179e-05, | |
| "loss": 0.0017, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 1.0296214161254553, | |
| "grad_norm": 7.879018994572107e-06, | |
| "learning_rate": 1.0214596568455806e-05, | |
| "loss": 0.0008, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.032789482021226, | |
| "grad_norm": 0.006021997891366482, | |
| "learning_rate": 1.0181248228369433e-05, | |
| "loss": 0.0012, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 1.0359575479169967, | |
| "grad_norm": 0.2344316691160202, | |
| "learning_rate": 1.0147899888283061e-05, | |
| "loss": 0.0028, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 1.0391256138127674, | |
| "grad_norm": 0.002558755222707987, | |
| "learning_rate": 1.011455154819669e-05, | |
| "loss": 0.0004, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 1.042293679708538, | |
| "grad_norm": 0.31538712978363037, | |
| "learning_rate": 1.0081203208110317e-05, | |
| "loss": 0.0007, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 1.0454617456043085, | |
| "grad_norm": 0.00013157808280084282, | |
| "learning_rate": 1.0047854868023945e-05, | |
| "loss": 0.0002, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.0486298115000792, | |
| "grad_norm": 0.0020028562285006046, | |
| "learning_rate": 1.0014506527937572e-05, | |
| "loss": 0.0013, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 1.0517978773958498, | |
| "grad_norm": 0.266053169965744, | |
| "learning_rate": 9.981158187851201e-06, | |
| "loss": 0.0013, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 1.0549659432916205, | |
| "grad_norm": 0.0006200580974109471, | |
| "learning_rate": 9.947809847764829e-06, | |
| "loss": 0.0009, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 1.0581340091873912, | |
| "grad_norm": 0.0010163384722545743, | |
| "learning_rate": 9.914461507678456e-06, | |
| "loss": 0.0004, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 1.0613020750831617, | |
| "grad_norm": 0.0007398608140647411, | |
| "learning_rate": 9.881113167592084e-06, | |
| "loss": 0.0006, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 1.0644701409789323, | |
| "grad_norm": 5.2672654419438913e-05, | |
| "learning_rate": 9.847764827505711e-06, | |
| "loss": 0.0003, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 1.067638206874703, | |
| "grad_norm": 0.0007165202987380326, | |
| "learning_rate": 9.814416487419339e-06, | |
| "loss": 0.0014, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 1.0708062727704737, | |
| "grad_norm": 0.012463639490306377, | |
| "learning_rate": 9.781068147332968e-06, | |
| "loss": 0.0006, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 1.0739743386662441, | |
| "grad_norm": 0.0006154962466098368, | |
| "learning_rate": 9.747719807246595e-06, | |
| "loss": 0.0002, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 1.0771424045620148, | |
| "grad_norm": 0.018396640196442604, | |
| "learning_rate": 9.714371467160223e-06, | |
| "loss": 0.0004, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.0803104704577855, | |
| "grad_norm": 0.0008099168189801276, | |
| "learning_rate": 9.68102312707385e-06, | |
| "loss": 0.0008, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 1.0834785363535562, | |
| "grad_norm": 0.01304282434284687, | |
| "learning_rate": 9.64767478698748e-06, | |
| "loss": 0.0004, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 1.0866466022493269, | |
| "grad_norm": 1.1287897825241089, | |
| "learning_rate": 9.614326446901107e-06, | |
| "loss": 0.0005, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 1.0898146681450975, | |
| "grad_norm": 0.0009092055261135101, | |
| "learning_rate": 9.580978106814734e-06, | |
| "loss": 0.0003, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 1.092982734040868, | |
| "grad_norm": 1.1557484867807943e-05, | |
| "learning_rate": 9.547629766728362e-06, | |
| "loss": 0.0001, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 1.0961507999366387, | |
| "grad_norm": 0.0008222841424867511, | |
| "learning_rate": 9.514281426641989e-06, | |
| "loss": 0.0012, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 1.0993188658324093, | |
| "grad_norm": 0.0022294274531304836, | |
| "learning_rate": 9.480933086555616e-06, | |
| "loss": 0.0058, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 1.10248693172818, | |
| "grad_norm": 0.009762086905539036, | |
| "learning_rate": 9.447584746469246e-06, | |
| "loss": 0.0006, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 1.1056549976239505, | |
| "grad_norm": 0.7700905799865723, | |
| "learning_rate": 9.414236406382873e-06, | |
| "loss": 0.002, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 1.1088230635197212, | |
| "grad_norm": 0.0003761460247915238, | |
| "learning_rate": 9.3808880662965e-06, | |
| "loss": 0.0003, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.1119911294154918, | |
| "grad_norm": 0.031011968851089478, | |
| "learning_rate": 9.347539726210128e-06, | |
| "loss": 0.0002, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 1.1151591953112625, | |
| "grad_norm": 5.691965634468943e-05, | |
| "learning_rate": 9.314191386123757e-06, | |
| "loss": 0.0012, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 1.1183272612070332, | |
| "grad_norm": 0.002426894148811698, | |
| "learning_rate": 9.280843046037384e-06, | |
| "loss": 0.0003, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 1.1214953271028036, | |
| "grad_norm": 8.327289106091484e-05, | |
| "learning_rate": 9.247494705951012e-06, | |
| "loss": 0.0008, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 1.1246633929985743, | |
| "grad_norm": 0.0014542491408064961, | |
| "learning_rate": 9.21414636586464e-06, | |
| "loss": 0.0003, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 1.127831458894345, | |
| "grad_norm": 0.00029037665808573365, | |
| "learning_rate": 9.180798025778268e-06, | |
| "loss": 0.0006, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 1.1309995247901157, | |
| "grad_norm": 0.00022046563390176743, | |
| "learning_rate": 9.147449685691896e-06, | |
| "loss": 0.0003, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 1.1341675906858864, | |
| "grad_norm": 0.0006549390382133424, | |
| "learning_rate": 9.114101345605523e-06, | |
| "loss": 0.0002, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 1.1373356565816568, | |
| "grad_norm": 7.389521488221362e-05, | |
| "learning_rate": 9.08075300551915e-06, | |
| "loss": 0.0004, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 1.1405037224774275, | |
| "grad_norm": 0.03006296418607235, | |
| "learning_rate": 9.047404665432778e-06, | |
| "loss": 0.0004, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.1436717883731982, | |
| "grad_norm": 0.0006366446614265442, | |
| "learning_rate": 9.014056325346406e-06, | |
| "loss": 0.0003, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 1.1468398542689688, | |
| "grad_norm": 1.6421807231381536e-05, | |
| "learning_rate": 8.980707985260035e-06, | |
| "loss": 0.0011, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 1.1500079201647395, | |
| "grad_norm": 0.0005404766998253763, | |
| "learning_rate": 8.947359645173662e-06, | |
| "loss": 0.0003, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 1.15317598606051, | |
| "grad_norm": 4.2021845729323104e-05, | |
| "learning_rate": 8.91401130508729e-06, | |
| "loss": 0.0005, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 1.1563440519562806, | |
| "grad_norm": 0.224160835146904, | |
| "learning_rate": 8.880662965000919e-06, | |
| "loss": 0.0006, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 1.1595121178520513, | |
| "grad_norm": 0.00010083234519697726, | |
| "learning_rate": 8.847314624914546e-06, | |
| "loss": 0.0004, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 1.162680183747822, | |
| "grad_norm": 0.00012005392636638135, | |
| "learning_rate": 8.813966284828174e-06, | |
| "loss": 0.0002, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 1.1658482496435927, | |
| "grad_norm": 0.00014081050176173449, | |
| "learning_rate": 8.780617944741801e-06, | |
| "loss": 0.0002, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 1.1690163155393631, | |
| "grad_norm": 0.0011462682159617543, | |
| "learning_rate": 8.74726960465543e-06, | |
| "loss": 0.0003, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 1.1721843814351338, | |
| "grad_norm": 0.0005480795516632497, | |
| "learning_rate": 8.713921264569056e-06, | |
| "loss": 0.0008, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.1753524473309045, | |
| "grad_norm": 0.00025665463181212544, | |
| "learning_rate": 8.680572924482683e-06, | |
| "loss": 0.0003, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 1.1785205132266752, | |
| "grad_norm": 0.0019408657681196928, | |
| "learning_rate": 8.647224584396313e-06, | |
| "loss": 0.0004, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 1.1816885791224458, | |
| "grad_norm": 8.986064131022431e-06, | |
| "learning_rate": 8.61387624430994e-06, | |
| "loss": 0.0003, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 1.1848566450182163, | |
| "grad_norm": 0.008489036932587624, | |
| "learning_rate": 8.580527904223567e-06, | |
| "loss": 0.0004, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 1.188024710913987, | |
| "grad_norm": 0.0009591535781510174, | |
| "learning_rate": 8.547179564137197e-06, | |
| "loss": 0.0004, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.1911927768097577, | |
| "grad_norm": 0.009112311527132988, | |
| "learning_rate": 8.513831224050824e-06, | |
| "loss": 0.0004, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 1.1943608427055283, | |
| "grad_norm": 0.9046971797943115, | |
| "learning_rate": 8.480482883964451e-06, | |
| "loss": 0.0004, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 1.1975289086012988, | |
| "grad_norm": 0.00033651836565695703, | |
| "learning_rate": 8.447134543878079e-06, | |
| "loss": 0.0009, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 1.2006969744970695, | |
| "grad_norm": 0.03974246233701706, | |
| "learning_rate": 8.413786203791708e-06, | |
| "loss": 0.0005, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 1.2038650403928401, | |
| "grad_norm": 5.420077286544256e-05, | |
| "learning_rate": 8.380437863705335e-06, | |
| "loss": 0.0001, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.2070331062886108, | |
| "grad_norm": 4.04864113079384e-06, | |
| "learning_rate": 8.347089523618963e-06, | |
| "loss": 0.0001, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 1.2102011721843815, | |
| "grad_norm": 0.011167597025632858, | |
| "learning_rate": 8.31374118353259e-06, | |
| "loss": 0.0004, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 1.2133692380801522, | |
| "grad_norm": 0.0032947207801043987, | |
| "learning_rate": 8.280392843446218e-06, | |
| "loss": 0.0003, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 1.2165373039759226, | |
| "grad_norm": 0.003191061317920685, | |
| "learning_rate": 8.247044503359845e-06, | |
| "loss": 0.0004, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 1.2197053698716933, | |
| "grad_norm": 0.0017466794233769178, | |
| "learning_rate": 8.213696163273474e-06, | |
| "loss": 0.0003, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 1.222873435767464, | |
| "grad_norm": 0.000900625775102526, | |
| "learning_rate": 8.180347823187102e-06, | |
| "loss": 0.0025, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 1.2260415016632347, | |
| "grad_norm": 0.015701597556471825, | |
| "learning_rate": 8.14699948310073e-06, | |
| "loss": 0.0013, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 1.2292095675590051, | |
| "grad_norm": 8.967128087533638e-05, | |
| "learning_rate": 8.113651143014357e-06, | |
| "loss": 0.0002, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 1.2323776334547758, | |
| "grad_norm": 0.23212437331676483, | |
| "learning_rate": 8.080302802927986e-06, | |
| "loss": 0.0002, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 1.2355456993505465, | |
| "grad_norm": 0.001443861285224557, | |
| "learning_rate": 8.046954462841613e-06, | |
| "loss": 0.0006, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.2387137652463172, | |
| "grad_norm": 0.32768702507019043, | |
| "learning_rate": 8.01360612275524e-06, | |
| "loss": 0.0002, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 1.2418818311420878, | |
| "grad_norm": 0.13200968503952026, | |
| "learning_rate": 7.980257782668868e-06, | |
| "loss": 0.0003, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 1.2450498970378585, | |
| "grad_norm": 0.041544314473867416, | |
| "learning_rate": 7.946909442582497e-06, | |
| "loss": 0.0003, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 1.248217962933629, | |
| "grad_norm": 0.003154648235067725, | |
| "learning_rate": 7.913561102496123e-06, | |
| "loss": 0.0003, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 1.2513860288293996, | |
| "grad_norm": 7.159214146668091e-05, | |
| "learning_rate": 7.880212762409752e-06, | |
| "loss": 0.0003, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 1.2545540947251703, | |
| "grad_norm": 0.005242425017058849, | |
| "learning_rate": 7.84686442232338e-06, | |
| "loss": 0.0005, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 1.257722160620941, | |
| "grad_norm": 0.12032686918973923, | |
| "learning_rate": 7.813516082237007e-06, | |
| "loss": 0.0002, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 1.2608902265167115, | |
| "grad_norm": 3.002350786118768e-05, | |
| "learning_rate": 7.780167742150634e-06, | |
| "loss": 0.0003, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 1.2640582924124821, | |
| "grad_norm": 1.667384094616864e-05, | |
| "learning_rate": 7.746819402064264e-06, | |
| "loss": 0.0003, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 1.2672263583082528, | |
| "grad_norm": 0.004214556887745857, | |
| "learning_rate": 7.713471061977891e-06, | |
| "loss": 0.0002, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.2703944242040235, | |
| "grad_norm": 0.00022098551562521607, | |
| "learning_rate": 7.680122721891518e-06, | |
| "loss": 0.0002, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 1.2735624900997942, | |
| "grad_norm": 0.0006105100037530065, | |
| "learning_rate": 7.646774381805146e-06, | |
| "loss": 0.0003, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 1.2767305559955648, | |
| "grad_norm": 0.0006831226055510342, | |
| "learning_rate": 7.613426041718774e-06, | |
| "loss": 0.0001, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 1.2798986218913353, | |
| "grad_norm": 0.00035824175574816763, | |
| "learning_rate": 7.580077701632402e-06, | |
| "loss": 0.001, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 1.283066687787106, | |
| "grad_norm": 0.0009726459975354373, | |
| "learning_rate": 7.54672936154603e-06, | |
| "loss": 0.0002, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 1.2862347536828767, | |
| "grad_norm": 3.4098738979082555e-05, | |
| "learning_rate": 7.513381021459657e-06, | |
| "loss": 0.0003, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 1.289402819578647, | |
| "grad_norm": 0.0016234411159530282, | |
| "learning_rate": 7.480032681373285e-06, | |
| "loss": 0.0004, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 1.2925708854744178, | |
| "grad_norm": 0.019855381920933723, | |
| "learning_rate": 7.446684341286913e-06, | |
| "loss": 0.0004, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 1.2957389513701885, | |
| "grad_norm": 0.010516811162233353, | |
| "learning_rate": 7.41333600120054e-06, | |
| "loss": 0.0002, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 1.2989070172659591, | |
| "grad_norm": 0.1184462159872055, | |
| "learning_rate": 7.379987661114169e-06, | |
| "loss": 0.0006, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 1.3020750831617298, | |
| "grad_norm": 3.537457814672962e-05, | |
| "learning_rate": 7.346639321027796e-06, | |
| "loss": 0.0002, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 1.3052431490575005, | |
| "grad_norm": 0.0002039948885794729, | |
| "learning_rate": 7.313290980941424e-06, | |
| "loss": 0.0002, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 1.308411214953271, | |
| "grad_norm": 0.06637877225875854, | |
| "learning_rate": 7.279942640855052e-06, | |
| "loss": 0.0003, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 1.3115792808490416, | |
| "grad_norm": 0.06105900555849075, | |
| "learning_rate": 7.24659430076868e-06, | |
| "loss": 0.0002, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 1.3147473467448123, | |
| "grad_norm": 0.0005946651799604297, | |
| "learning_rate": 7.213245960682308e-06, | |
| "loss": 0.0012, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 1.317915412640583, | |
| "grad_norm": 0.00044926407281309366, | |
| "learning_rate": 7.179897620595936e-06, | |
| "loss": 0.0005, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 1.3210834785363534, | |
| "grad_norm": 0.0034730539191514254, | |
| "learning_rate": 7.146549280509564e-06, | |
| "loss": 0.0022, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 1.3242515444321241, | |
| "grad_norm": 0.00045728174154646695, | |
| "learning_rate": 7.113200940423191e-06, | |
| "loss": 0.0002, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 1.3274196103278948, | |
| "grad_norm": 0.029374372214078903, | |
| "learning_rate": 7.079852600336818e-06, | |
| "loss": 0.0007, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 1.3305876762236655, | |
| "grad_norm": 0.010232986882328987, | |
| "learning_rate": 7.0465042602504465e-06, | |
| "loss": 0.0003, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.3337557421194361, | |
| "grad_norm": 0.00016134929319377989, | |
| "learning_rate": 7.013155920164074e-06, | |
| "loss": 0.0002, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 1.3369238080152068, | |
| "grad_norm": 0.0003897528804372996, | |
| "learning_rate": 6.979807580077702e-06, | |
| "loss": 0.0002, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 1.3400918739109773, | |
| "grad_norm": 0.0017471498576924205, | |
| "learning_rate": 6.94645923999133e-06, | |
| "loss": 0.0002, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 1.343259939806748, | |
| "grad_norm": 0.017642421647906303, | |
| "learning_rate": 6.913110899904958e-06, | |
| "loss": 0.0001, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 1.3464280057025186, | |
| "grad_norm": 0.0010317267151549459, | |
| "learning_rate": 6.879762559818585e-06, | |
| "loss": 0.0002, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 1.3495960715982893, | |
| "grad_norm": 0.029274389147758484, | |
| "learning_rate": 6.846414219732214e-06, | |
| "loss": 0.0005, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 1.3527641374940598, | |
| "grad_norm": 0.0007354663102887571, | |
| "learning_rate": 6.813065879645842e-06, | |
| "loss": 0.0001, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 1.3559322033898304, | |
| "grad_norm": 0.00034641989623196423, | |
| "learning_rate": 6.779717539559469e-06, | |
| "loss": 0.0002, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 1.3591002692856011, | |
| "grad_norm": 0.149564728140831, | |
| "learning_rate": 6.746369199473098e-06, | |
| "loss": 0.0003, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 1.3622683351813718, | |
| "grad_norm": 0.011665324680507183, | |
| "learning_rate": 6.713020859386724e-06, | |
| "loss": 0.0004, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 1.3654364010771425, | |
| "grad_norm": 0.00013306832988746464, | |
| "learning_rate": 6.679672519300352e-06, | |
| "loss": 0.0002, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 1.3686044669729132, | |
| "grad_norm": 0.00031623971881344914, | |
| "learning_rate": 6.64632417921398e-06, | |
| "loss": 0.0004, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 1.3717725328686836, | |
| "grad_norm": 0.003858871292322874, | |
| "learning_rate": 6.612975839127607e-06, | |
| "loss": 0.0008, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 1.3749405987644543, | |
| "grad_norm": 0.00579412467777729, | |
| "learning_rate": 6.579627499041236e-06, | |
| "loss": 0.0002, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 1.378108664660225, | |
| "grad_norm": 3.0335993869812228e-05, | |
| "learning_rate": 6.546279158954864e-06, | |
| "loss": 0.0004, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 1.3812767305559956, | |
| "grad_norm": 0.0006304982816800475, | |
| "learning_rate": 6.512930818868491e-06, | |
| "loss": 0.0001, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 1.384444796451766, | |
| "grad_norm": 0.00015773455379530787, | |
| "learning_rate": 6.47958247878212e-06, | |
| "loss": 0.0001, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 1.3876128623475368, | |
| "grad_norm": 0.005809741094708443, | |
| "learning_rate": 6.446234138695747e-06, | |
| "loss": 0.009, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 1.3907809282433075, | |
| "grad_norm": 0.0010448688408359885, | |
| "learning_rate": 6.412885798609375e-06, | |
| "loss": 0.0002, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 1.3939489941390781, | |
| "grad_norm": 0.0030797335784882307, | |
| "learning_rate": 6.379537458523003e-06, | |
| "loss": 0.0002, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.3971170600348488, | |
| "grad_norm": 0.00012675885227508843, | |
| "learning_rate": 6.346189118436631e-06, | |
| "loss": 0.0003, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 1.4002851259306195, | |
| "grad_norm": 0.0043240697123110294, | |
| "learning_rate": 6.312840778350258e-06, | |
| "loss": 0.0003, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 1.40345319182639, | |
| "grad_norm": 0.00020247649808879942, | |
| "learning_rate": 6.279492438263885e-06, | |
| "loss": 0.0002, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 1.4066212577221606, | |
| "grad_norm": 0.0025763397570699453, | |
| "learning_rate": 6.2461440981775134e-06, | |
| "loss": 0.0004, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 1.4097893236179313, | |
| "grad_norm": 0.00025575104518793523, | |
| "learning_rate": 6.212795758091142e-06, | |
| "loss": 0.0006, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 1.4129573895137018, | |
| "grad_norm": 0.0006715962663292885, | |
| "learning_rate": 6.179447418004769e-06, | |
| "loss": 0.0002, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 1.4161254554094724, | |
| "grad_norm": 0.0005802169325761497, | |
| "learning_rate": 6.1460990779183974e-06, | |
| "loss": 0.0004, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 1.419293521305243, | |
| "grad_norm": 6.348552687995834e-06, | |
| "learning_rate": 6.112750737832025e-06, | |
| "loss": 0.0002, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 1.4224615872010138, | |
| "grad_norm": 0.000993837951682508, | |
| "learning_rate": 6.079402397745653e-06, | |
| "loss": 0.0002, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 1.4256296530967845, | |
| "grad_norm": 0.036865074187517166, | |
| "learning_rate": 6.046054057659281e-06, | |
| "loss": 0.0004, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 1.4287977189925551, | |
| "grad_norm": 0.09351787716150284, | |
| "learning_rate": 6.012705717572909e-06, | |
| "loss": 0.0003, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 1.4319657848883256, | |
| "grad_norm": 0.00022034939320292324, | |
| "learning_rate": 5.979357377486536e-06, | |
| "loss": 0.0002, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 1.4351338507840963, | |
| "grad_norm": 0.002382364822551608, | |
| "learning_rate": 5.946009037400165e-06, | |
| "loss": 0.0002, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 1.438301916679867, | |
| "grad_norm": 0.0010514174355193973, | |
| "learning_rate": 5.912660697313791e-06, | |
| "loss": 0.0001, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 1.4414699825756376, | |
| "grad_norm": 0.0006282671820372343, | |
| "learning_rate": 5.8793123572274195e-06, | |
| "loss": 0.0002, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 1.444638048471408, | |
| "grad_norm": 0.0001003501529339701, | |
| "learning_rate": 5.845964017141047e-06, | |
| "loss": 0.0002, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 1.4478061143671788, | |
| "grad_norm": 0.0019161907257512212, | |
| "learning_rate": 5.812615677054675e-06, | |
| "loss": 0.0002, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 1.4509741802629494, | |
| "grad_norm": 0.0002770457649603486, | |
| "learning_rate": 5.779267336968303e-06, | |
| "loss": 0.0004, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 1.4541422461587201, | |
| "grad_norm": 0.001281541888602078, | |
| "learning_rate": 5.745918996881931e-06, | |
| "loss": 0.0003, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 1.4573103120544908, | |
| "grad_norm": 0.028755199164152145, | |
| "learning_rate": 5.712570656795558e-06, | |
| "loss": 0.0002, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 1.4604783779502615, | |
| "grad_norm": 0.0007570263114757836, | |
| "learning_rate": 5.679222316709187e-06, | |
| "loss": 0.0003, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 1.463646443846032, | |
| "grad_norm": 0.00423109345138073, | |
| "learning_rate": 5.645873976622814e-06, | |
| "loss": 0.0001, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 1.4668145097418026, | |
| "grad_norm": 5.3798950830241665e-05, | |
| "learning_rate": 5.612525636536442e-06, | |
| "loss": 0.0003, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 1.4699825756375733, | |
| "grad_norm": 7.343962352024391e-05, | |
| "learning_rate": 5.57917729645007e-06, | |
| "loss": 0.0003, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 1.473150641533344, | |
| "grad_norm": 0.025961237028241158, | |
| "learning_rate": 5.545828956363698e-06, | |
| "loss": 0.0002, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 1.4763187074291144, | |
| "grad_norm": 0.00014132962678559124, | |
| "learning_rate": 5.512480616277325e-06, | |
| "loss": 0.0002, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 1.479486773324885, | |
| "grad_norm": 0.0044896723702549934, | |
| "learning_rate": 5.479132276190953e-06, | |
| "loss": 0.0002, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 1.4826548392206558, | |
| "grad_norm": 5.546275497181341e-05, | |
| "learning_rate": 5.44578393610458e-06, | |
| "loss": 0.0002, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 1.4858229051164265, | |
| "grad_norm": 0.0015877331607043743, | |
| "learning_rate": 5.412435596018209e-06, | |
| "loss": 0.0001, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 1.4889909710121971, | |
| "grad_norm": 8.771561260800809e-05, | |
| "learning_rate": 5.379087255931836e-06, | |
| "loss": 0.0001, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 1.4921590369079678, | |
| "grad_norm": 9.971875260816887e-05, | |
| "learning_rate": 5.345738915845464e-06, | |
| "loss": 0.0001, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 1.4953271028037383, | |
| "grad_norm": 6.665828550467268e-05, | |
| "learning_rate": 5.312390575759092e-06, | |
| "loss": 0.0001, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 1.498495168699509, | |
| "grad_norm": 0.12465495616197586, | |
| "learning_rate": 5.27904223567272e-06, | |
| "loss": 0.0003, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 1.5016632345952796, | |
| "grad_norm": 0.00016497267642989755, | |
| "learning_rate": 5.2456938955863476e-06, | |
| "loss": 0.0001, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 1.50483130049105, | |
| "grad_norm": 0.000793833751231432, | |
| "learning_rate": 5.212345555499976e-06, | |
| "loss": 0.0001, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 1.5079993663868207, | |
| "grad_norm": 0.004431690089404583, | |
| "learning_rate": 5.178997215413603e-06, | |
| "loss": 0.0001, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 1.5111674322825914, | |
| "grad_norm": 8.146934123942629e-05, | |
| "learning_rate": 5.1456488753272316e-06, | |
| "loss": 0.0004, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 1.514335498178362, | |
| "grad_norm": 0.0032630818895995617, | |
| "learning_rate": 5.112300535240858e-06, | |
| "loss": 0.0003, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 1.5175035640741328, | |
| "grad_norm": 3.78349454877025e-06, | |
| "learning_rate": 5.0789521951544864e-06, | |
| "loss": 0.0002, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 1.5206716299699035, | |
| "grad_norm": 7.793370605213568e-05, | |
| "learning_rate": 5.045603855068114e-06, | |
| "loss": 0.0002, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.5238396958656741, | |
| "grad_norm": 1.0998847756127361e-05, | |
| "learning_rate": 5.012255514981742e-06, | |
| "loss": 0.0002, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 1.5270077617614446, | |
| "grad_norm": 7.101731171132997e-05, | |
| "learning_rate": 4.97890717489537e-06, | |
| "loss": 0.0002, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 1.5301758276572153, | |
| "grad_norm": 0.04721503704786301, | |
| "learning_rate": 4.945558834808998e-06, | |
| "loss": 0.0005, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 1.533343893552986, | |
| "grad_norm": 0.04013681039214134, | |
| "learning_rate": 4.912210494722625e-06, | |
| "loss": 0.0002, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 1.5365119594487564, | |
| "grad_norm": 0.15584056079387665, | |
| "learning_rate": 4.878862154636254e-06, | |
| "loss": 0.0002, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 1.539680025344527, | |
| "grad_norm": 0.008085441775619984, | |
| "learning_rate": 4.845513814549881e-06, | |
| "loss": 0.0002, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 1.5428480912402978, | |
| "grad_norm": 0.00025607392308302224, | |
| "learning_rate": 4.8121654744635085e-06, | |
| "loss": 0.0001, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 1.5460161571360684, | |
| "grad_norm": 0.009352591820061207, | |
| "learning_rate": 4.778817134377137e-06, | |
| "loss": 0.0001, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 1.5491842230318391, | |
| "grad_norm": 0.00013986592239234596, | |
| "learning_rate": 4.745468794290764e-06, | |
| "loss": 0.0001, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 1.5523522889276098, | |
| "grad_norm": 0.0005525100277736783, | |
| "learning_rate": 4.7121204542043925e-06, | |
| "loss": 0.0002, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 1.5555203548233805, | |
| "grad_norm": 0.002130384324118495, | |
| "learning_rate": 4.678772114118021e-06, | |
| "loss": 0.0003, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 1.558688420719151, | |
| "grad_norm": 0.00036302325315773487, | |
| "learning_rate": 4.645423774031647e-06, | |
| "loss": 0.0002, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 1.5618564866149216, | |
| "grad_norm": 2.546385076129809e-05, | |
| "learning_rate": 4.612075433945276e-06, | |
| "loss": 0.0002, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 1.565024552510692, | |
| "grad_norm": 0.0013008471578359604, | |
| "learning_rate": 4.578727093858903e-06, | |
| "loss": 0.0002, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 1.5681926184064627, | |
| "grad_norm": 0.0018331869505345821, | |
| "learning_rate": 4.545378753772531e-06, | |
| "loss": 0.0002, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 1.5713606843022334, | |
| "grad_norm": 0.00046941745677031577, | |
| "learning_rate": 4.51203041368616e-06, | |
| "loss": 0.0001, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 1.574528750198004, | |
| "grad_norm": 0.09666042774915695, | |
| "learning_rate": 4.478682073599787e-06, | |
| "loss": 0.0001, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 1.5776968160937748, | |
| "grad_norm": 0.000714512774720788, | |
| "learning_rate": 4.4453337335134145e-06, | |
| "loss": 0.0002, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 1.5808648819895454, | |
| "grad_norm": 0.001266203005798161, | |
| "learning_rate": 4.411985393427042e-06, | |
| "loss": 0.0002, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 1.5840329478853161, | |
| "grad_norm": 0.00010851142724277452, | |
| "learning_rate": 4.37863705334067e-06, | |
| "loss": 0.0001, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.5872010137810868, | |
| "grad_norm": 0.02388921193778515, | |
| "learning_rate": 4.3452887132542985e-06, | |
| "loss": 0.0001, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 1.5903690796768573, | |
| "grad_norm": 0.004322574008256197, | |
| "learning_rate": 4.311940373167926e-06, | |
| "loss": 0.0001, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 1.593537145572628, | |
| "grad_norm": 0.0002055590011877939, | |
| "learning_rate": 4.278592033081554e-06, | |
| "loss": 0.0008, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 1.5967052114683984, | |
| "grad_norm": 0.006136850919574499, | |
| "learning_rate": 4.245243692995181e-06, | |
| "loss": 0.0001, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 1.599873277364169, | |
| "grad_norm": 0.03570784255862236, | |
| "learning_rate": 4.211895352908809e-06, | |
| "loss": 0.0002, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 1.6030413432599397, | |
| "grad_norm": 0.00013556861085817218, | |
| "learning_rate": 4.178547012822437e-06, | |
| "loss": 0.0002, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 1.6062094091557104, | |
| "grad_norm": 0.00039382753311656415, | |
| "learning_rate": 4.145198672736065e-06, | |
| "loss": 0.0, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 1.609377475051481, | |
| "grad_norm": 0.014802640303969383, | |
| "learning_rate": 4.111850332649693e-06, | |
| "loss": 0.0001, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 1.6125455409472518, | |
| "grad_norm": 0.002426808699965477, | |
| "learning_rate": 4.0785019925633206e-06, | |
| "loss": 0.0001, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 1.6157136068430225, | |
| "grad_norm": 0.0027719761710613966, | |
| "learning_rate": 4.045153652476948e-06, | |
| "loss": 0.0003, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 1.6188816727387931, | |
| "grad_norm": 0.0003508755180519074, | |
| "learning_rate": 4.011805312390576e-06, | |
| "loss": 0.0007, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 1.6220497386345636, | |
| "grad_norm": 0.00042187023791484535, | |
| "learning_rate": 3.978456972304204e-06, | |
| "loss": 0.0001, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 1.6252178045303343, | |
| "grad_norm": 0.00010520713840378448, | |
| "learning_rate": 3.945108632217832e-06, | |
| "loss": 0.0001, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 1.6283858704261047, | |
| "grad_norm": 2.475667861290276e-05, | |
| "learning_rate": 3.9117602921314594e-06, | |
| "loss": 0.0002, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 1.6315539363218754, | |
| "grad_norm": 0.00042487168684601784, | |
| "learning_rate": 3.878411952045088e-06, | |
| "loss": 0.0002, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 1.634722002217646, | |
| "grad_norm": 0.0007512226002290845, | |
| "learning_rate": 3.845063611958715e-06, | |
| "loss": 0.0003, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 1.6378900681134168, | |
| "grad_norm": 0.0007686197641305625, | |
| "learning_rate": 3.8117152718723426e-06, | |
| "loss": 0.0002, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 1.6410581340091874, | |
| "grad_norm": 0.006279453635215759, | |
| "learning_rate": 3.7783669317859705e-06, | |
| "loss": 0.0002, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 1.644226199904958, | |
| "grad_norm": 5.683067865902558e-05, | |
| "learning_rate": 3.7450185916995983e-06, | |
| "loss": 0.0001, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 1.6473942658007288, | |
| "grad_norm": 0.08361367881298065, | |
| "learning_rate": 3.711670251613226e-06, | |
| "loss": 0.0002, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 1.6505623316964992, | |
| "grad_norm": 1.9708577394485474, | |
| "learning_rate": 3.678321911526854e-06, | |
| "loss": 0.0004, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 1.65373039759227, | |
| "grad_norm": 0.01258891262114048, | |
| "learning_rate": 3.6449735714404815e-06, | |
| "loss": 0.0002, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 1.6568984634880406, | |
| "grad_norm": 1.2053630598529708e-05, | |
| "learning_rate": 3.6116252313541093e-06, | |
| "loss": 0.0001, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 1.660066529383811, | |
| "grad_norm": 0.007339359261095524, | |
| "learning_rate": 3.578276891267737e-06, | |
| "loss": 0.0006, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 1.6632345952795817, | |
| "grad_norm": 7.470462151104584e-05, | |
| "learning_rate": 3.544928551181365e-06, | |
| "loss": 0.0003, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 1.6664026611753524, | |
| "grad_norm": 0.001069075195118785, | |
| "learning_rate": 3.5115802110949933e-06, | |
| "loss": 0.0001, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 1.669570727071123, | |
| "grad_norm": 0.0014466423308476806, | |
| "learning_rate": 3.478231871008621e-06, | |
| "loss": 0.0001, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 1.6727387929668938, | |
| "grad_norm": 0.0008716689771972597, | |
| "learning_rate": 3.4448835309222482e-06, | |
| "loss": 0.0001, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 1.6759068588626644, | |
| "grad_norm": 0.0006247049896046519, | |
| "learning_rate": 3.411535190835876e-06, | |
| "loss": 0.0002, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 1.6790749247584351, | |
| "grad_norm": 0.0016816813731566072, | |
| "learning_rate": 3.378186850749504e-06, | |
| "loss": 0.0001, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 1.6822429906542056, | |
| "grad_norm": 0.015677401795983315, | |
| "learning_rate": 3.3448385106631322e-06, | |
| "loss": 0.0001, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 1.6854110565499762, | |
| "grad_norm": 0.0020264824852347374, | |
| "learning_rate": 3.31149017057676e-06, | |
| "loss": 0.0003, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 1.6885791224457467, | |
| "grad_norm": 0.006521924398839474, | |
| "learning_rate": 3.278141830490388e-06, | |
| "loss": 0.0002, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 1.6917471883415174, | |
| "grad_norm": 0.055716466158628464, | |
| "learning_rate": 3.244793490404015e-06, | |
| "loss": 0.0001, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 1.694915254237288, | |
| "grad_norm": 0.0009364295983687043, | |
| "learning_rate": 3.2114451503176432e-06, | |
| "loss": 0.0002, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 1.6980833201330587, | |
| "grad_norm": 0.026155732572078705, | |
| "learning_rate": 3.178096810231271e-06, | |
| "loss": 0.0001, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 1.7012513860288294, | |
| "grad_norm": 0.0009285922278650105, | |
| "learning_rate": 3.144748470144899e-06, | |
| "loss": 0.0009, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 1.7044194519246, | |
| "grad_norm": 0.00019248783064540476, | |
| "learning_rate": 3.111400130058527e-06, | |
| "loss": 0.0002, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 1.7075875178203708, | |
| "grad_norm": 0.008935322985053062, | |
| "learning_rate": 3.0780517899721547e-06, | |
| "loss": 0.0002, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 1.7107555837161414, | |
| "grad_norm": 3.448131974437274e-05, | |
| "learning_rate": 3.044703449885782e-06, | |
| "loss": 0.0002, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 1.713923649611912, | |
| "grad_norm": 0.0037920591421425343, | |
| "learning_rate": 3.01135510979941e-06, | |
| "loss": 0.0001, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 1.7170917155076826, | |
| "grad_norm": 0.0004557653737720102, | |
| "learning_rate": 2.978006769713038e-06, | |
| "loss": 0.0001, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 1.720259781403453, | |
| "grad_norm": 0.001680429675616324, | |
| "learning_rate": 2.9446584296266657e-06, | |
| "loss": 0.0002, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 1.7234278472992237, | |
| "grad_norm": 0.006457743234932423, | |
| "learning_rate": 2.9113100895402936e-06, | |
| "loss": 0.0001, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 1.7265959131949944, | |
| "grad_norm": 0.0018418490653857589, | |
| "learning_rate": 2.8779617494539214e-06, | |
| "loss": 0.0002, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 1.729763979090765, | |
| "grad_norm": 0.012908555567264557, | |
| "learning_rate": 2.844613409367549e-06, | |
| "loss": 0.0001, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 1.7329320449865357, | |
| "grad_norm": 0.0021234566811472178, | |
| "learning_rate": 2.8112650692811767e-06, | |
| "loss": 0.0002, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 1.7361001108823064, | |
| "grad_norm": 0.3285054862499237, | |
| "learning_rate": 2.7779167291948046e-06, | |
| "loss": 0.0001, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 1.739268176778077, | |
| "grad_norm": 0.0007893216679804027, | |
| "learning_rate": 2.7445683891084324e-06, | |
| "loss": 0.0001, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 1.7424362426738478, | |
| "grad_norm": 0.02494579553604126, | |
| "learning_rate": 2.7112200490220603e-06, | |
| "loss": 0.0002, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 1.7456043085696182, | |
| "grad_norm": 0.0023814570158720016, | |
| "learning_rate": 2.677871708935688e-06, | |
| "loss": 0.0002, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 1.748772374465389, | |
| "grad_norm": 0.0058886525221168995, | |
| "learning_rate": 2.6445233688493156e-06, | |
| "loss": 0.0002, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 1.7519404403611594, | |
| "grad_norm": 4.876612001680769e-05, | |
| "learning_rate": 2.6111750287629435e-06, | |
| "loss": 0.0002, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 1.75510850625693, | |
| "grad_norm": 0.010443676263093948, | |
| "learning_rate": 2.5778266886765713e-06, | |
| "loss": 0.0002, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 1.7582765721527007, | |
| "grad_norm": 0.01249265018850565, | |
| "learning_rate": 2.544478348590199e-06, | |
| "loss": 0.0001, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 1.7614446380484714, | |
| "grad_norm": 0.00021050056966487318, | |
| "learning_rate": 2.511130008503827e-06, | |
| "loss": 0.0002, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 1.764612703944242, | |
| "grad_norm": 0.01633504591882229, | |
| "learning_rate": 2.4777816684174545e-06, | |
| "loss": 0.0001, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 1.7677807698400128, | |
| "grad_norm": 3.676281266962178e-05, | |
| "learning_rate": 2.4444333283310823e-06, | |
| "loss": 0.0003, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 1.7709488357357834, | |
| "grad_norm": 0.002830359386280179, | |
| "learning_rate": 2.4110849882447106e-06, | |
| "loss": 0.0001, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 1.7741169016315539, | |
| "grad_norm": 0.0029975976794958115, | |
| "learning_rate": 2.377736648158338e-06, | |
| "loss": 0.0002, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 1.7772849675273246, | |
| "grad_norm": 0.005194125231355429, | |
| "learning_rate": 2.344388308071966e-06, | |
| "loss": 0.0001, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 1.7804530334230952, | |
| "grad_norm": 7.272951734194066e-06, | |
| "learning_rate": 2.3110399679855938e-06, | |
| "loss": 0.0003, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 1.7836210993188657, | |
| "grad_norm": 0.0007229465409182012, | |
| "learning_rate": 2.2776916278992216e-06, | |
| "loss": 0.0002, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 1.7867891652146364, | |
| "grad_norm": 0.0005428678123280406, | |
| "learning_rate": 2.2443432878128495e-06, | |
| "loss": 0.0004, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 1.789957231110407, | |
| "grad_norm": 0.017834417521953583, | |
| "learning_rate": 2.2109949477264774e-06, | |
| "loss": 0.0002, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 1.7931252970061777, | |
| "grad_norm": 0.0003519799211062491, | |
| "learning_rate": 2.177646607640105e-06, | |
| "loss": 0.0002, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 1.7962933629019484, | |
| "grad_norm": 5.785848043160513e-05, | |
| "learning_rate": 2.1442982675537327e-06, | |
| "loss": 0.0001, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 1.799461428797719, | |
| "grad_norm": 0.00827944464981556, | |
| "learning_rate": 2.1109499274673605e-06, | |
| "loss": 0.0001, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 1.8026294946934898, | |
| "grad_norm": 8.96235360414721e-06, | |
| "learning_rate": 2.0776015873809884e-06, | |
| "loss": 0.0001, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 1.8057975605892602, | |
| "grad_norm": 8.50809519761242e-05, | |
| "learning_rate": 2.0442532472946162e-06, | |
| "loss": 0.0002, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 1.808965626485031, | |
| "grad_norm": 0.0564473532140255, | |
| "learning_rate": 2.010904907208244e-06, | |
| "loss": 0.0002, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 1.8121336923808016, | |
| "grad_norm": 0.0019106407416984439, | |
| "learning_rate": 1.9775565671218715e-06, | |
| "loss": 0.0002, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 1.815301758276572, | |
| "grad_norm": 0.0011765076778829098, | |
| "learning_rate": 1.9442082270354994e-06, | |
| "loss": 0.0002, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 1.8184698241723427, | |
| "grad_norm": 0.002034899080172181, | |
| "learning_rate": 1.9108598869491273e-06, | |
| "loss": 0.0001, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 1.8216378900681134, | |
| "grad_norm": 2.886955189751461e-05, | |
| "learning_rate": 1.877511546862755e-06, | |
| "loss": 0.0003, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 1.824805955963884, | |
| "grad_norm": 0.0003313591005280614, | |
| "learning_rate": 1.844163206776383e-06, | |
| "loss": 0.0001, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 1.8279740218596547, | |
| "grad_norm": 0.0030703512020409107, | |
| "learning_rate": 1.8108148666900108e-06, | |
| "loss": 0.0001, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 1.8311420877554254, | |
| "grad_norm": 0.05901242792606354, | |
| "learning_rate": 1.7774665266036383e-06, | |
| "loss": 0.0002, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 1.834310153651196, | |
| "grad_norm": 0.00020717663574032485, | |
| "learning_rate": 1.7441181865172664e-06, | |
| "loss": 0.0002, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 1.8374782195469666, | |
| "grad_norm": 3.810102498391643e-05, | |
| "learning_rate": 1.7107698464308942e-06, | |
| "loss": 0.0007, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 1.8406462854427372, | |
| "grad_norm": 0.00012426413013599813, | |
| "learning_rate": 1.6774215063445219e-06, | |
| "loss": 0.0001, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 1.8438143513385077, | |
| "grad_norm": 0.00743386335670948, | |
| "learning_rate": 1.6440731662581497e-06, | |
| "loss": 0.0001, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 1.8469824172342784, | |
| "grad_norm": 0.04067447409033775, | |
| "learning_rate": 1.6107248261717776e-06, | |
| "loss": 0.0001, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 1.850150483130049, | |
| "grad_norm": 2.0743360437336378e-05, | |
| "learning_rate": 1.5773764860854052e-06, | |
| "loss": 0.0001, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 1.8533185490258197, | |
| "grad_norm": 0.006036572623997927, | |
| "learning_rate": 1.544028145999033e-06, | |
| "loss": 0.0001, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 1.8564866149215904, | |
| "grad_norm": 0.0008050315082073212, | |
| "learning_rate": 1.510679805912661e-06, | |
| "loss": 0.0001, | |
| "step": 58600 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 63130, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |